pax_global_header00006660000000000000000000000064145717076740014534gustar00rootroot0000000000000052 comment=481eb47850ad2ff69e2016e695371f9476396435 netconsd-0.4.1/000077500000000000000000000000001457170767400133535ustar00rootroot00000000000000netconsd-0.4.1/.github/000077500000000000000000000000001457170767400147135ustar00rootroot00000000000000netconsd-0.4.1/.github/workflows/000077500000000000000000000000001457170767400167505ustar00rootroot00000000000000netconsd-0.4.1/.github/workflows/ci.yml000066400000000000000000000014521457170767400200700ustar00rootroot00000000000000name: Continuous Integration on: push: branches: [main] pull_request: jobs: build: name: Build netconsd runs-on: ubuntu-latest strategy: matrix: include: - cc: gcc cxx: g++ - cc: clang cxx: clang++ env: CC: ${{ matrix.cc }} CXX: ${{ matrix.cxx }} steps: - name: Checkout repository uses: actions/checkout@v2 - name: Build netconsd run: make - name: Build netconsblaster run: make -C util markdown: name: Markdown runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@v2 - name: Lint Markdown uses: actionshub/markdownlint@2.0.2 - name: Check links uses: gaurav-nelson/github-action-markdown-link-check@1.0.13 netconsd-0.4.1/.gitignore000066400000000000000000000002111457170767400153350ustar00rootroot00000000000000*.d *.o netconsd modules/*.so util/netconsblaster /netconsd-*.tar.gz /netconsd-*.src.rpm /netconsd.spec Cargo.lock /target libnetconsd.a netconsd-0.4.1/.mdlrc000066400000000000000000000000551457170767400144550ustar00rootroot00000000000000rules '~MD013', '~MD014', '~MD029', '~MD034' netconsd-0.4.1/.packit.yaml000066400000000000000000000027671457170767400156040ustar00rootroot00000000000000# See the documentation for more information: # https://packit.dev/docs/configuration/ specfile_path: netconsd.spec files_to_sync: - netconsd.spec - .packit.yaml upstream_package_name: netconsd downstream_package_name: netconsd actions: # Fetch the specfile from Rawhide, drop any patches and disable rpmautospec post-upstream-clone: "bash -c \"curl -s https://src.fedoraproject.org/rpms/netconsd/raw/main/f/netconsd.spec | sed -e '/^Patch[0-9]/d' -e '/^%autochangelog$/d' > netconsd.spec\"" srpm_build_deps: - bash - curl - sed jobs: - job: copr_build trigger: commit owner: "@meta" project: netconsd targets: - fedora-all-aarch64 - fedora-all-i386 - fedora-all-ppc64le - fedora-all-s390x - fedora-all-x86_64 - fedora-eln-aarch64 - fedora-eln-i386 - fedora-eln-ppc64le - fedora-eln-s390x - fedora-eln-x86_64 - epel-8-aarch64 - epel-8-ppc64le - epel-8-s390x - epel-8-x86_64 - epel-9-aarch64 - epel-9-ppc64le - epel-9-s390x - epel-9-x86_64 - job: copr_build trigger: pull_request owner: "@meta" project: netconsd targets: - fedora-all-aarch64 - fedora-all-i386 - fedora-all-ppc64le - fedora-all-s390x - fedora-all-x86_64 - fedora-eln-aarch64 - fedora-eln-i386 - fedora-eln-ppc64le - fedora-eln-s390x - fedora-eln-x86_64 - epel-8-aarch64 - epel-8-ppc64le - epel-8-s390x - epel-8-x86_64 - epel-9-aarch64 - epel-9-ppc64le - epel-9-s390x - epel-9-x86_64 netconsd-0.4.1/CODE_OF_CONDUCT.md000066400000000000000000000064331457170767400161600ustar00rootroot00000000000000# Code of Conduct ## Our Pledge In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. ## Our Standards Examples of behavior that contributes to creating a positive environment include: * Using welcoming and inclusive language * Being respectful of differing viewpoints and experiences * Gracefully accepting constructive criticism * Focusing on what is best for the community * Showing empathy towards other community members Examples of unacceptable behavior by participants include: * The use of sexualized language or imagery and unwelcome sexual attention or advances * Trolling, insulting/derogatory comments, and personal or political attacks * Public or private harassment * Publishing others' private information, such as a physical or electronic address, without explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting ## Our Responsibilities Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. ## Scope This Code of Conduct applies within all project spaces, and it also applies when an individual is representing the project or its community in public spaces. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at . All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html [homepage]: https://www.contributor-covenant.org For answers to common questions about this code of conduct, see https://www.contributor-covenant.org/faq netconsd-0.4.1/CONTRIBUTING.md000066400000000000000000000024061457170767400156060ustar00rootroot00000000000000# Contributing to netconsd We want to make contributing to this project as easy and transparent as possible. ## Our Development Process This repository is synced from an internal repository. We gladly accept pull requests and will deal with the merging appropriately. ## Contributor License Agreement ("CLA") In order to accept your pull request, we need you to submit a CLA. You only need to do this once to work on any of Facebook's open source projects. Complete your CLA here: ## Issues We use GitHub issues to track public bugs. Please ensure your description is clear and has sufficient instructions to be able to reproduce the issue. Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe disclosure of security bugs. In those cases, please go through the process outlined on that page and do not file a public issue. ## Sending a pull request Have a fix or feature? Awesome! When you send the pull request we suggest you include a build output. We will hold all contributions to the same quality and style standards as the existing code. ## License By contributing to this repository, you agree that your contributions will be licensed in accordance to the LICENSE document in the root of this repository. netconsd-0.4.1/Cargo.toml000066400000000000000000000004161457170767400153040ustar00rootroot00000000000000[package] name = "netconsd" version = "0.1.0" edition = "2021" links="netconsd" [[bin]] name = "netconsd" path = "main.rs" [dependencies] libc = "0.2.135" clap = {version = "3.2.22", features = ["derive"] } anyhow = "1.0.66" [profile.release] lto = true strip = true netconsd-0.4.1/LICENSE000066400000000000000000000027101457170767400143600ustar00rootroot00000000000000Copyright (c) Meta Platforms, Inc. and affiliates. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. netconsd-0.4.1/Makefile000066400000000000000000000027071457170767400150210ustar00rootroot00000000000000CC ?= gcc LIBS = -lpthread -lrt -ldl CFLAGS ?= -O2 -fPIC CFLAGS += -D_GNU_SOURCE -fno-strict-aliasing -Wall -Wextra \ -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations \ -Wdeclaration-after-statement -Wno-missing-field-initializers \ -Wno-unused-parameter CPPFLAGS ?= INCLUDES = -Incrx debug debug32: CFLAGS += -O0 -gdwarf-4 -fno-omit-frame-pointer \ -fstack-protector-all -fsanitize=address \ -fsanitize=undefined debug debug32: LDFLAGS ?= -lasan -lubsan 32bit: CFLAGS += -m32 32bit: LDFLAGS ?= -m32 disasm: CFLAGS += -fverbose-asm binary = netconsd lib = ncrx/libncrx.o liball = libnetconsd.a obj = threads.o listener.o worker.o output.o main.o rlibobj = threads.o listener.o worker.o output.o asm = $(obj:.o=.s) all: $(binary) mods rlib: $(liball) 32bit: $(binary) mods debug: all debug32: 32bit disasm: $(asm) -include $(obj:.o=.d) $(binary): $(lib) $(obj) $(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(lib) $(obj) $(LIBS) -o $@ $(liball): $(rlibobj) $(lib) ar rc $@ $(rlibobj) $(lib) %.o: %.c $(CC) $< $(CPPFLAGS) $(CFLAGS) $(INCLUDES) -c -o $@ $(CC) -MM $< $(INCLUDES) > $(@:.o=.d) %.s: %.c $(CC) $< $(CPPFLAGS) $(CFLAGS) $(INCLUDES) -c -S -o $@ $(lib): $(MAKE) -e -C ncrx mods: $(MAKE) -e -C modules utils: $(MAKE) -e -C util clean: rm -f netconsd *.o *.d *.s rm -f modules/*.o modules/*.so rm -f ncrx/*.o ncrx/*.d rm -f util/netconsblaster rm -f libnetconsd.a netconsd-0.4.1/README.md000066400000000000000000000175341457170767400146440ustar00rootroot00000000000000# Netconsd: The Netconsole Daemon [![Continuous Integration](https://github.com/facebook/netconsd/workflows/Continuous%20Integration/badge.svg?event=push)](https://github.com/facebook/netconsd/actions?query=workflow%3A%22Continuous+Integration%22) This is a daemon for receiving and processing logs from the Linux Kernel, as emitted over a network by the kernel's netconsole module. It supports both the old "legacy" text-only format, and the new extended format added in v4.4. The core of the daemon does nothing but process messages and drop them: in order to make the daemon useful, the user must supply one or more "output modules". These modules are shared object files which expose a small ABI that is called by netconsd with the content and metadata for netconsole messages it receives. This README explains how to build netconsd and use it with one of the existing output modules in the modules/ directory. The end discusses how to write your own custom output module. ## Building netconsd The default Makefile target intended for production use has no external dependencies besides glibc. To build it, just say `make`: you'll end up with a single executable in this directory called `netconsd`, and a `*.so` file for every module in the `modules/` directory. The Makefile includes a few other handy targets: * `debug`: Adds the usual debug flags, and also enables the ASAN and UBSAN sanitizers. You'll need to install libasan/libubsan on your system to build this target and run the binaries. * `32bit`: Forces 32-bit compilation on x86_64 systems, for easily testing portability to 32-bit CPU architectures. You'll need to install 32-bit libraries if your distro doesn't have them. * `debug32`: Union of the `32bit` and `debug` targets. * `disasm`: Emits verbose annotated disassembly in `*.s` files. If you want to build the daemon with clang, just append `CC="clang"` to your make invocation. All the above targets should build with both clang and gcc. ### Rust An executable can also be built using `cargo build`. The Rust main function has not been production-tested and it's intended to be used as an experiment, or to facilitate an incremental conversion of C code to Rust. ## Running netconsd ### Setting up the server By default, netconsd will use 1 listener and 2 worker threads, and listen on port 1514 for messages. You can use `-l`, `-w`, and `-u` respectively to change the defaults. There's no universal wisdom about how many threads to use: just experiment with different numbers and use netconsblaster to load up the server. Both the blaster and the server will print how many packets they sent/processed. If you run out of memory and OOM, you need more workers; if you see messages being dropped, you need more listeners. The tuning here will obviously depend on what your output module does: make sure to pass it when you do your testing. For the simplest setup, just run: ``` $ make -s $ ./netconsd ./modules/printer.so ``` Netconsd will always listen on `INADDR_ANY` and `IN6ADDR_ANY`. So far there's been no reason to make that configurable: if you care, open an issue and we will. ### Setting up the client The netconsole module takes a parameter like this: ``` netconsole=[+][r]${sport}@${saddr}/${intf},${dport}@${daddr}/${dmac} ``` The fields are as follows: 1. `sport`: Source port for the netconsole UDP packets 2. `saddr`: Source address for the netconsole UDP packets 3. `intf`: The name of the interface to send the UDP packets from 4. `dport`: Destination port for the netconsole UDP packets 5. `daddr`: Destination address for the netconsole UDP packets 6. `dmac`: Destination L2 MAC address for the netconsole UDP packets We need (6) because of how low-level netconsole is: it can't consult the routing table to send the packet, so it must know a priori what MAC address to use in the Ethernet frame it builds. If you're talking to a server on the same L2 segment as the client, use the MAC address of that server. Otherwise, use the MAC address of your router. You can use the following quick shell one-liners to easily get the MAC of the router: * IPv6: `ip -6 neighbor show | grep router` * IPv4: `sudo arp –a | grep gateway` Here are a couple examples for the parameter above: ``` IPv6: netconsole=+r6666@2401:db00:11:801e:face:0:31:0/eth0,1514@2401:db00:11:d0be:face:0:1b:0/c0:8c:60:3d:0d:bc IPv4: netconsole=6666@192.168.0.22/eth0,1514@192.168.0.1/00:00:0c:9f:f1:90 ``` Prepending `+` to the cmdline will cause kernels that support it to use extended netconsole, which you almost certainly want. Kernels too old to support extcon will silently ignore the `+`. Adding the `r` to the command line will cause netcons to emit the kernel release version in the first field of the extended message. For that, you need to have extended log (extcon) enabled. Once you have your parameter constructed, just insert the module with it: ``` $ sudo modprobe netconsole netconsole=+r6666@2401:db00:11:801e:face:0:31:0/eth0,1514@2401:db00:11:d0be:face:0:1b:0/c0:8c:60:3d:0d:bc ``` You're good to go! ### Testing on the client Now that everything is running, you can use `/dev/kmsg` to write some logs: ``` $ sudo bash -c 'echo "Hello world!" > /dev/kmsg' $ sudo bash -c 'echo "<0>OMG!" > /dev/kmsg' ``` The `<0>` tells the kernel what loglevel to use: 0 is `KERN_EMERG`, which ensures your message will actually get transmitted. ## Writing an output module ### Interface to netconsd Output modules are shared object files loaded with `dlopen()` at runtime by netconsd. Netconsd will look for three functions in your module: 1. `int netconsd_output_init(int worker_thread_count)` 2. `void netconsd_output_handler(int thread, struct in6_addr *src, struct msgbuf *buf, struct ncrx_msg *msg)` 3. `void netconsd_output_exit(void)` If (1) exists, it is called when your module is loaded: the argument tells you how many worker threads netconsd is going to call your module from. If you return non-zero from this function, netconsd will `abort()` and exit. If (3) exists, it is called when netconsd unloads your module. For every message it receives, netconsd will call (2) in your module. The code must be reentrant: `netconsd_output_handler()` will be called concurrently from all of the worker threads in netconsd. The `thread` argument tells you which worker is invoking the function, which makes it easy to have per-thread data. Netconsd uses a consistent hash to decide which worker to pass messages to, so messages from same remote address will always be queued to the same thread. The `src` argument will always point to an `in6_addr` struct containing the source address of the netconsole packet. If the source was an IPv4 address, it will be formatted like `::FFFF:` (see `man ipv6` for details). If the message had extended metadata, `msg` will point to the `ncrx_msg` struct containing that metadata and `buf` will be `NULL`. Otherwise, `msg` will be `NULL` and `buf` will point to a `msgbuf` struct with the raw message text. Output modules must not modify the structures passed in. The memory backing all the pointers passed in will be freed immediately after the handler returns. ### Building the modules For modules written in C this is trivial: just compile with `-shared`. For modules written in C++ it can be a bit trickier: you will probably need to build with `-static-libstdc++` and/or `-static-libgcc` to make this work. See the code and Makefile in `modules/` for some examples of the above. Check out `rust/` if you are looking for an example of a netconsd module written in Rust. ## Contributing See the CONTRIBUTING file for how to help out. ## License netconsd is BSD licensed, see the LICENSE file for more information. netconsd was originally written by Calvin Owens as part of [fbkutils](https://github.com/facebookarchive/fbkutils) in 2016, with later contributions by several other people. This repository is a direct continuation of that codebase. netconsd-0.4.1/build.rs000066400000000000000000000007251457170767400150240ustar00rootroot00000000000000/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ use std::process::Command; fn main() { Command::new("make") .arg("clean") .arg("rlib") .status() .expect("make build failed"); println!("cargo:rustc-link-search=native=./"); println!("cargo:rustc-link-lib=static=netconsd"); } netconsd-0.4.1/include/000077500000000000000000000000001457170767400147765ustar00rootroot00000000000000netconsd-0.4.1/include/common.h000066400000000000000000000033641457170767400164450ustar00rootroot00000000000000/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #ifndef __COMMON_H__ #define __COMMON_H__ #include #include #include #include #include #include #include #include #include "log.h" #include "jhash.h" #define min(x, y) ({ \ typeof(x) _min1 = (x); \ typeof(y) _min2 = (y); \ (void) (&_min1 == &_min2); \ _min1 < _min2 ? _min1 : _min2; }) #define max(x, y) ({ \ typeof(x) _max1 = (x); \ typeof(y) _max2 = (y); \ (void) (&_max1 == &_max2); \ _max1 > _max2 ? _max1 : _max2; }) #define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) #define container_of(ptr, type, member) ({ \ const typeof( ((type *)0)->member ) *__mptr = (ptr); \ (type *)( (char *)__mptr - __builtin_offsetof(type,member) );}) static inline void *zalloc(size_t n) { return calloc(1, n); } #define assert_pthread_mutex_locked(m) \ do { \ fatal_on(pthread_mutex_trylock(m) != EBUSY, "UNLOCKED!\n"); \ } while (0) static inline uint64_t now_ms(clockid_t clock) { struct timespec t; int ret; ret = clock_gettime(clock, &t); fatal_on(ret, "Oops, clock_gettime() barfed: %m (-%d)\n", errno); return t.tv_sec * 1000LL + t.tv_nsec / 1000000L; } static inline uint64_t now_mono_ms(void) { return now_ms(CLOCK_MONOTONIC); } static inline uint64_t now_real_ms(void) { return now_ms(CLOCK_REALTIME); } struct netconsd_params { int nr_workers; int nr_listeners; int mmsg_batch; unsigned int gc_int_ms; unsigned int gc_age_ms; struct sockaddr_in6 listen_addr; }; #endif /* __COMMON_H__ */ netconsd-0.4.1/include/jhash.h000066400000000000000000000062371457170767400162540ustar00rootroot00000000000000/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * Lifted from 4.4 Linux kernel source. Alterations for netconsd: * - Pulled in rol32() from linux/bitops.h * - Use stdint fixed-width types instead of kernel shorthand types * - Deleted unaligned jhash() because we don't use it and C++ hates it. */ #ifndef _LINUX_JHASH_H #define _LINUX_JHASH_H /* jhash.h: Jenkins hash support. * * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net) * * http://burtleburtle.net/bob/hash/ * * These are the credits from Bob's sources: * * lookup3.c, by Bob Jenkins, May 2006, Public Domain. * * These are functions for producing 32-bit hashes for hash table lookup. * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() * are externally useful functions. Routines to test the hash are included * if SELF_TEST is defined. You can use this free for any purpose. It's in * the public domain. It has no warranty. * * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu) * * I've modified Bob's hash to be useful in the Linux kernel, and * any bugs present are my fault. * Jozsef */ #include static inline uint32_t rol32(uint32_t word, unsigned int shift) { return (word << shift) | (word >> (32 - shift)); } /* Best hash sizes are of power of two */ #define jhash_size(n) ((uint32_t)1<<(n)) /* Mask the hash value, i.e (value & jhash_mask(n)) instead of (value % n) */ #define jhash_mask(n) (jhash_size(n)-1) /* __jhash_mix -- mix 3 32-bit values reversibly. */ #define __jhash_mix(a, b, c) \ { \ a -= c; a ^= rol32(c, 4); c += b; \ b -= a; b ^= rol32(a, 6); a += c; \ c -= b; c ^= rol32(b, 8); b += a; \ a -= c; a ^= rol32(c, 16); c += b; \ b -= a; b ^= rol32(a, 19); a += c; \ c -= b; c ^= rol32(b, 4); b += a; \ } /* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */ #define __jhash_final(a, b, c) \ { \ c ^= b; c -= rol32(b, 14); \ a ^= c; a -= rol32(c, 11); \ b ^= a; b -= rol32(a, 25); \ c ^= b; c -= rol32(b, 16); \ a ^= c; a -= rol32(c, 4); \ b ^= a; b -= rol32(a, 14); \ c ^= b; c -= rol32(b, 24); \ } /* * Arbitrary initial parameters */ #define JHASH_INITVAL 0xdeadbeef #define LISTEN_SEED 0xfaceb00c #define WORKER_SEED 0xb00cface /* jhash2 - hash an array of uint32_t's * @k: the key which must be an array of uint32_t's * @length: the number of uint32_t's in the key * @initval: the previous hash, or an arbitray value * * Returns the hash value of the key. */ static inline __attribute__((pure)) uint32_t jhash2(const uint32_t *k, uint32_t length, uint32_t initval) { uint32_t a, b, c; /* Set up the internal state */ a = b = c = JHASH_INITVAL + (length<<2) + initval; /* Handle most of the key */ while (length > 3) { a += k[0]; b += k[1]; c += k[2]; __jhash_mix(a, b, c); length -= 3; k += 3; } /* Handle the last 3 uint32_t's: all the case statements fall through */ switch (length) { case 3: c += k[2]; __attribute__((fallthrough)); case 2: b += k[1]; __attribute__((fallthrough)); case 1: a += k[0]; __jhash_final(a, b, c); break; case 0: /* Nothing left to add */ break; } return c; } #endif /* _LINUX_JHASH_H */ netconsd-0.4.1/include/listener.h000066400000000000000000000013161457170767400167750ustar00rootroot00000000000000/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #ifndef __LISTENER_H__ #define __LISTENER_H__ #include "threads.h" #define RCVBUF_SIZE 1024 struct ncrx_worker; struct ncrx_prequeue { struct msg_buf *queue_head; struct msg_buf *queue_tail; int count; }; struct ncrx_listener { pthread_t id; int thread_nr; struct ncrx_prequeue *prequeues; struct ncrx_worker *workers; int nr_workers; int batch; uint64_t processed; struct sockaddr_in6 *address; /* * Flags */ unsigned stop:1; }; void *udp_listener_thread(void *arg); #endif /* __LISTENER_H__ */ netconsd-0.4.1/include/log.h000066400000000000000000000023641457170767400157350ustar00rootroot00000000000000/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #ifndef __LOG_H__ #define __LOG_H__ #include #include #define LOGPFX "[fb-netconsd] " #define S(x) #x #define S_(x) S(x) #define S__LINE__ S_(__LINE__) #define __log(pfx, ...) \ do { \ printf(LOGPFX __FILE__ ":" S__LINE__ ": " pfx __VA_ARGS__); \ fflush(stdout); \ } while (0) #define fatal(...) \ do { \ __log("FATAL: ", __VA_ARGS__); \ abort(); \ } while (0) #define warn(...) \ do { \ __log("WARNING: ", __VA_ARGS__); \ } while (0) #define log(...) \ do { \ __log("INFO: ", __VA_ARGS__); \ } while (0) #ifdef DEBUG #define debug(...) \ do { \ __log("DEBUG: ", __VA_ARGS__); \ } while (0) #else #define debug(...) do {} while (0) #endif #define fatal_on(cond, ...) \ do { \ if (__builtin_expect(cond, 0)) { \ fatal(__VA_ARGS__); \ } \ } while (0) #define log_once(...) \ do { \ static int _t; \ if (__builtin_expect(!_t, 0)) { \ log(__VA_ARGS__); \ _t = -1; \ } \ } while (0) #define log_every(n, ...) \ do { \ static int _t = 1; \ if (!(_t % n), 0) \ log(__VA_ARGS__); \ _t++; \ } while (0) #endif /* __LOG_H__ */ netconsd-0.4.1/include/msgbuf-struct.h000066400000000000000000000014721457170767400177600ustar00rootroot00000000000000/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #ifndef __MSGBUF_STRUCT_H__ #define __MSGBUF_STRUCT_H__ #include #include #include #include #ifdef __cplusplus #define __cpp extern "C" #else #define __cpp #endif struct ncrx_msg; struct msg_buf { struct msg_buf *next; struct iovec iovec; struct sockaddr_in6 src; uint64_t rcv_time; int rcv_flags; int rcv_bytes; char buf[]; }; __cpp int netconsd_output_init(int nr_workers); __cpp void netconsd_output_exit(void); __cpp void netconsd_output_handler(int t, struct in6_addr *src, struct msg_buf *b, struct ncrx_msg *m); #endif /* __MSGBUF_STRUCT_H__ */ netconsd-0.4.1/include/output.h000066400000000000000000000010411457170767400165030ustar00rootroot00000000000000/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #ifndef __OUTPUT_H__ #define __OUTPUT_H__ #include #include "msgbuf-struct.h" #define MAXOUTS 32 int register_output_module(char *path, int nr_workers); void destroy_output_modules(void); void execute_output_pipeline(int thread_nr, struct in6_addr *src, struct msg_buf *buf, struct ncrx_msg *msg); #endif /* __OUTPUT_H__ */ netconsd-0.4.1/include/threads.h000066400000000000000000000010141457170767400165750ustar00rootroot00000000000000/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #ifndef __NCRX_THREADS_H__ #define __NCRX_THREADS_H__ #include "msgbuf-struct.h" #include "common.h" struct tctl; struct ncrx_listener; void enqueue_and_wake_all(struct ncrx_listener *listener); struct tctl *create_threads(struct netconsd_params *p); void destroy_threads(struct tctl *ctl); #endif /* __NCRX_THREADS_H__ */ netconsd-0.4.1/include/worker.h000066400000000000000000000016121457170767400164600ustar00rootroot00000000000000/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #ifndef __WORKER_H__ #define __WORKER_H__ #include #include "msgbuf-struct.h" /* * How long to wait for messages before giving up, in milliseconds */ #define NETCONS_RTO 200 struct hashtable; struct timerlist; struct ncrx_worker { struct msg_buf *queue_head; struct msg_buf *queue_tail; pthread_t id; pthread_condattr_t condattr; pthread_cond_t cond; pthread_mutex_t queuelock; int nr_queued; struct hashtable *ht; struct timerlist *tlist; struct timespec wake; unsigned int gc_age_ms; unsigned int gc_int_ms; uint64_t lastgc; uint64_t processed; uint64_t hosts_seen; int thread_nr; /* * Flags */ unsigned stop:1; }; void *ncrx_worker_thread(void *arg); #endif netconsd-0.4.1/listener.c000066400000000000000000000075041457170767400153520ustar00rootroot00000000000000/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #include #include #include #include #include #include #include #include #include #include "include/common.h" #include "include/msgbuf-struct.h" #include "include/threads.h" #include "include/listener.h" static void handle_listen_error(int err) { switch(err) { case EINTR: /* * The fact that we got an error return means that recvmmsg() * hadn't actually done anything, so we can just loop back over * the call no problem. */ return; case 0: fatal("Unexpected EOF from recvmmsg()\n"); default: fatal("Unexpected listen error: %m (-%d)\n", errno); } } static struct msg_buf *msgbuf_from_iovec(struct iovec *vecptr) { return container_of(vecptr, struct msg_buf, iovec); } static unsigned long hash_srcaddr(struct in6_addr *addr) { uint32_t *addrptr = (uint32_t *)addr; return jhash2(addrptr, sizeof(*addr) / sizeof(*addrptr), LISTEN_SEED); } static void prequeue_msgbuf(struct ncrx_listener *listener, struct msg_buf *buf) { struct ncrx_prequeue *prequeue; unsigned long hash; hash = hash_srcaddr(&buf->src.sin6_addr); prequeue = &listener->prequeues[hash % listener->nr_workers]; if (prequeue->queue_head) prequeue->queue_tail->next = buf; else prequeue->queue_head = buf; prequeue->queue_tail = buf; prequeue->count++; } static void reinit_mmsghdr_vec(struct mmsghdr *vec, int nr, int rcvbufsz) { struct msg_buf *cur; int i; memset(vec, 0, sizeof(*vec) * nr); for (i = 0; i < nr; i++) { cur = malloc(sizeof(*cur) + rcvbufsz); if (!cur) fatal("-ENOMEM after %d/%d rcvbufs\n", i, nr); memset(cur, 0, sizeof(*cur)); cur->buf[rcvbufsz - 1] = '\0'; cur->iovec.iov_base = &cur->buf; cur->iovec.iov_len = rcvbufsz - 1; vec[i].msg_hdr.msg_iov = &cur->iovec; vec[i].msg_hdr.msg_iovlen = 1; vec[i].msg_hdr.msg_name = &cur->src; vec[i].msg_hdr.msg_namelen = sizeof(cur->src); } } static struct mmsghdr *alloc_mmsghdr_vec(int nr, int rcvbufsz) { struct mmsghdr *mmsgvec; mmsgvec = malloc(sizeof(*mmsgvec) * nr); if (!mmsgvec) fatal("Unable to allocate mmsghdr array\n"); reinit_mmsghdr_vec(mmsgvec, nr, rcvbufsz); return mmsgvec; } static void free_mmsghdr_vec(struct mmsghdr *vec, int nr) { struct msg_buf *cur; int i; for (i = 0; i < nr; i++) { cur = msgbuf_from_iovec(vec[i].msg_hdr.msg_iov); free(cur); } free(vec); } static int get_listen_socket(struct sockaddr_in6 *bindaddr) { int fd, ret, optval = 1; fd = socket(AF_INET6, SOCK_DGRAM, 0); if (fd == -1) fatal("Couldn't get socket: %m\n"); ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &optval, sizeof(optval)); if (ret == -1) fatal("Couldn't set SO_REUSEPORT on socket: %m\n"); ret = bind(fd, bindaddr, sizeof(*bindaddr)); if (ret == -1) fatal("Couldn't bind: %m\n"); return fd; } void *udp_listener_thread(void *arg) { int fd, nr_recv, i; uint64_t now; struct ncrx_listener *us = arg; struct mmsghdr *vec; struct msg_buf *cur; fd = get_listen_socket(us->address); vec = alloc_mmsghdr_vec(us->batch, RCVBUF_SIZE); while (!us->stop) { nr_recv = recvmmsg(fd, vec, us->batch, MSG_WAITFORONE, NULL); if (nr_recv <= 0) { handle_listen_error(errno); continue; } debug("recvmmsg() got %d packets\n", nr_recv); now = now_real_ms(); for (i = 0; i < nr_recv; i++) { cur = msgbuf_from_iovec(vec[i].msg_hdr.msg_iov); cur->rcv_flags = vec[i].msg_hdr.msg_flags; cur->rcv_bytes = vec[i].msg_len; cur->rcv_time = now; prequeue_msgbuf(us, cur); us->processed++; } enqueue_and_wake_all(us); reinit_mmsghdr_vec(vec, nr_recv, RCVBUF_SIZE); } free_mmsghdr_vec(vec, us->batch); return NULL; } netconsd-0.4.1/main.c000066400000000000000000000066771457170767400144630ustar00rootroot00000000000000/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #include #include #include #include #include #include "include/common.h" #include "include/output.h" #include "include/threads.h" #include "include/listener.h" static void parse_arguments(int argc, char **argv, struct netconsd_params *p) { int i; char *tmp; static const char *optstr = "hw:l:b:a:u:g:"; static const struct option optlong[] = { { .name = "help", .has_arg = no_argument, .val = 'h', }, { .name = NULL, }, }; while (1) { i = getopt_long(argc, argv, optstr, optlong, NULL); switch (i) { case 'w': p->nr_workers = atoi(optarg); break; case 'l': p->nr_listeners = atoi(optarg); break; case 'b': p->mmsg_batch = atoi(optarg); break; case 'a': if (!inet_pton(AF_INET6, optarg, &p->listen_addr.sin6_addr)) fatal("invalid listen address\n"); debug("listening for address %s\n", optarg); break; case 'u': p->listen_addr.sin6_port = htons(atoi(optarg)); break; case 'g': tmp = index(optarg, '/'); if (!tmp) fatal("'-g' expects 'INTERVAL/AGE' in ms\n"); p->gc_int_ms = atoi(optarg); p->gc_age_ms = atoi(tmp + 1); if (p->gc_age_ms < p->gc_int_ms) fatal("GC age must be >= GC interval\n"); break; case -1: goto done; case 'h': printf("Usage: %s [-w workers] [-l listeners] " "[-b mmsg_batch] [-a udp_listen_addr] [-u udp_listen_port] " "[-g '${interval}/${age}'] [output module path] " "[another output module path...]\n", argv[0]); exit(0); default: exit(1); } } done: /* * Register output modules */ if (optind == argc) warn("You passed no output modules, which is sort of silly\n"); if (argc - optind > MAXOUTS) fatal("Too many output mods: %d>%d\n", argc - optind, MAXOUTS); for (i = optind; i < argc; i++) if (register_output_module(argv[i], p->nr_workers)) fatal("Can't register '%s'\n", argv[i]); } /* * This exists to kick the blocking recvmmsg() call in the listener threads, so * they get -EINTR, notice the stop flag, and terminate. * * See also: stop_and_wait_for_listeners() in threads.c */ static void interrupter_handler(int sig) { return; } /* * Initialize the set of signals for which we try to terminate gracefully. */ static void init_sigset(sigset_t *set) { sigemptyset(set); sigaddset(set, SIGTERM); sigaddset(set, SIGINT); sigaddset(set, SIGHUP); } static void init_sighandlers(void) { struct sigaction ignorer = { .sa_handler = SIG_IGN, }; struct sigaction interrupter = { .sa_handler = interrupter_handler, .sa_flags = SA_NODEFER, }; sigaction(SIGUSR1, &interrupter, NULL); sigaction(SIGPIPE, &ignorer, NULL); } int main(int argc, char **argv) { int num; sigset_t set; struct tctl *ctl; struct netconsd_params params = { .nr_workers = 2, .nr_listeners = 1, .mmsg_batch = 512, .gc_int_ms = 0, .gc_age_ms = 0, .listen_addr = { .sin6_family = AF_INET6, .sin6_addr = IN6ADDR_ANY_INIT, .sin6_port = htons(1514), } }; parse_arguments(argc, argv, ¶ms); init_sighandlers(); init_sigset(&set); sigprocmask(SIG_BLOCK, &set, NULL); ctl = create_threads(¶ms); sigwait(&set, &num); log("Signal: '%s', terminating\n", strsignal(num)); destroy_threads(ctl); destroy_output_modules(); return 0; } netconsd-0.4.1/main.rs000066400000000000000000000122241457170767400146460ustar00rootroot00000000000000/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ use anyhow::bail; use anyhow::Error; use clap::Parser; use libc::in6_addr; use libc::sigaction; use libc::sigaddset; use libc::sigemptyset; use libc::sigprocmask; use libc::sigset_t; use libc::sigwait; use libc::sockaddr_in6; use libc::strsignal; use libc::AF_INET6; use libc::SA_NODEFER; use libc::SIGHUP; use libc::SIGINT; use libc::SIGPIPE; use libc::SIGTERM; use libc::SIGUSR1; use libc::SIG_BLOCK; use libc::SIG_IGN; extern "C" { fn register_output_module(path: *const c_char, nr_workers: c_int); fn create_threads(params: &netconsd_params) -> *const c_void; fn destroy_threads(ctl: *const c_void); fn destroy_output_modules(); } #[repr(C)] #[derive(Copy, Clone)] pub struct netconsd_params { pub nr_workers: c_int, pub nr_listeners: c_int, pub mmsg_batch: c_int, pub gc_int_ms: c_uint, pub gc_age_ms: c_uint, pub listen_addr: sockaddr_in6, } #[derive(Debug)] struct GcParams { age: u32, interval: u32, } impl FromStr for GcParams { type Err = Error; fn from_str(s: &str) -> Result { let split: Vec<&str> = s.split("/").collect(); if split.len() != 2 { bail!("Wrong GC format, it should be /"); } let age = match split[0].parse::() { Ok(x) => x, Err(_) => bail!("Could not parse age, it should be an unsigned 32bits integer."), }; let interval = match split[1].parse::() { Ok(x) => x, Err(_) => bail!("Could not parse interval, it should be an unsigned 32bits integer."), }; if age < interval { bail!("GC age should >= GC interval"); } Ok(GcParams { age, interval }) } } #[derive(Parser, Debug)] struct CliArgs { /// Number of worker threads #[clap(short, long)] workers: Option, /// Number of listener threads #[clap(short, long)] listeners: Option, /// Message batch size #[clap(short, long)] batch: Option, /// UDP listen IPV6 address #[clap(short, long)] address: Option, /// UDP listen port #[clap(short = 'u', long)] port: Option, /// Garbage collector interval/age in ms #[clap(short, long)] gc: Option, /// Dynamic modules to load #[clap()] modules: Vec, } /* * This exists to kick the blocking recvmmsg() call in the listener threads, so * they get -EINTR, notice the stop flag, and terminate. * * See also: stop_and_wait_for_listeners() in threads.c */ fn interrupter_handler(_sig: c_int) { return; } unsafe fn init_sighandlers() { let ignorer: sigaction = sigaction { sa_sigaction: SIG_IGN, sa_mask: MaybeUninit::zeroed().assume_init(), sa_flags: 0, sa_restorer: None, }; let interrupter = sigaction { sa_sigaction: interrupter_handler as usize, sa_mask: MaybeUninit::zeroed().assume_init(), sa_flags: SA_NODEFER, sa_restorer: None, }; sigaction(SIGUSR1, &interrupter, std::ptr::null_mut::()); sigaction(SIGPIPE, &ignorer, std::ptr::null_mut::()); } /* * Initialize the set of signals for which we try to terminate gracefully. */ unsafe fn init_sigset(set: &mut sigset_t) { sigemptyset(set); sigaddset(set, SIGTERM); sigaddset(set, SIGINT); sigaddset(set, SIGHUP); } fn get_netconsd_params(cli_args: &CliArgs) -> netconsd_params { netconsd_params { nr_workers: cli_args.workers.unwrap_or(2).into(), nr_listeners: cli_args.listeners.unwrap_or(1).into(), mmsg_batch: cli_args.batch.unwrap_or(512).into(), gc_int_ms: cli_args.gc.as_ref().map(|gc| gc.interval).unwrap_or(0), gc_age_ms: cli_args.gc.as_ref().map(|gc| gc.age).unwrap_or(0), listen_addr: sockaddr_in6 { sin6_family: AF_INET6 as u16, sin6_port: cli_args.port.unwrap_or(1514).to_be(), sin6_flowinfo: 0, sin6_addr: in6_addr { s6_addr: cli_args .address .map(|x| x.octets().clone()) .unwrap_or_else(|| [0u8; 16]), }, sin6_scope_id: 0, }, } } fn main() { let cli_args = CliArgs::parse(); let params = get_netconsd_params(&cli_args); unsafe { for module in cli_args.modules.iter() { let path = CString::new(module.as_str()).unwrap(); register_output_module(path.as_ptr(), params.nr_workers); } init_sighandlers(); let mut set: sigset_t = MaybeUninit::zeroed().assume_init(); init_sigset(&mut set); sigprocmask(SIG_BLOCK, &set, std::ptr::null_mut::()); let mut num: c_int = 0; let ctl = create_threads(¶ms); sigwait(&set, &mut num); println!( "Signal: '{:?}', terminating", CStr::from_ptr(strsignal(num)) ); destroy_threads(ctl); destroy_output_modules(); }; } netconsd-0.4.1/modules/000077500000000000000000000000001457170767400150235ustar00rootroot00000000000000netconsd-0.4.1/modules/Makefile000066400000000000000000000010571457170767400164660ustar00rootroot00000000000000CC ?= gcc CXX ?= g++ CPPFLAGS ?= LDFLAGS ?= override CFLAGS += -fPIC override CXXFLAGS += -std=c++11 -fPIC override LDFLAGS += -shared -static-libstdc++ -static-libgcc INCLUDES = -I../ncrx -I../include mods = printer.so logger.so all: $(mods) %.so: %.c $(CC) $< $(CPPFLAGS) $(CFLAGS) $(INCLUDES) $(LDFLAGS) -c -o $(<:.c=.o) $(CC) $(<:.c=.o) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) -o $@ %.so: %.cc $(CXX) $< $(CPPFLAGS) $(CXXFLAGS) $(INCLUDES) $(LDFLAGS) -c -o $(<:.cc=.o) $(CXX) $(<:.cc=.o) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) -o $@ clean: rm -f *.o *.so netconsd-0.4.1/modules/logger.cc000066400000000000000000000074331457170767400166200ustar00rootroot00000000000000/* logger.cc: Very simple example C++ netconsd module * * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * The below allows us to index an unordered_map by an IP address. */ static bool operator==(const struct in6_addr& lhs, const struct in6_addr& rhs) { return std::memcmp(&lhs, &rhs, 16) == 0; } namespace std { template<> struct hash { std::size_t operator()(struct in6_addr const& s) const { return jhash2((uint32_t*)&s, sizeof(s) / sizeof(uint32_t), 0xbeefdead); } }; } /* namespace std */ /* * Basic struct to hold the hostname and the FD for its logfile. */ struct logtarget { char hostname[INET6_ADDRSTRLEN + 1]; int fd; /* * Resolve the hostname, and open() an appropriately named file to * write the logs into. */ logtarget(struct in6_addr *src) { int ret; struct sockaddr_in6 sa = { .sin6_family = AF_INET6, .sin6_port = 0, }; memcpy(&sa.sin6_addr, src, sizeof(*src)); ret = getnameinfo((const struct sockaddr *)&sa, sizeof(sa), hostname, sizeof(hostname) - 1, NULL, 0, NI_NAMEREQD); if (ret) { const char *ptr; fprintf(stderr, "getnameinfo failed: %s\n", gai_strerror(ret)); ptr = inet_ntop(AF_INET6, src, hostname, INET6_ADDRSTRLEN); if (ptr == NULL) { fprintf(stderr, "inet_ntop failed: %s\n", strerror(errno)); snprintf(hostname, 8, "unknown"); } } ret = open(hostname, O_TRUNC|O_WRONLY|O_CREAT, 0644); if (ret == -1) { fprintf(stderr, "FATAL: open() failed: %m\n"); abort(); } fd = ret; } /* * Close the file */ ~logtarget(void) { close(fd); } }; /* * This relates the IP address of the remote host to its logtarget struct. */ static std::unordered_map *maps; /* * Return the existing logtarget struct if we've seen this host before; else, * initialize a new logtarget, insert it, and return that. */ static struct logtarget& get_target(int thread_nr, struct in6_addr *src) { auto itr = maps[thread_nr].find(*src); if (itr == maps[thread_nr].end()) return maps[thread_nr].emplace(*src, src).first->second; return itr->second; } /* * Actually write the line to the file */ static void write_log(struct logtarget& tgt, struct msg_buf *buf, struct ncrx_msg *msg) { /* legacy non-extended netcons message */ if (!msg) { dprintf(tgt.fd, "%s\n", buf->buf); return; } /* extended netcons msg with metadata */ if (std::strlen(msg->version) > 1) dprintf(tgt.fd, "%s ", msg->version); dprintf(tgt.fd, "%06" PRIu64 " ", msg->seq); dprintf(tgt.fd, "%014" PRIu64 " ", msg->ts_usec); dprintf(tgt.fd, "%d ", msg->facility); dprintf(tgt.fd, "%d ", msg->level); if (msg->cont_start) dprintf(tgt.fd, "[CONT START] "); if (msg->cont) dprintf(tgt.fd, "[CONT] "); if (msg->oos) dprintf(tgt.fd, "[OOS] "); if (msg->seq_reset) dprintf(tgt.fd, "[SEQ RESET] "); dprintf(tgt.fd, "%s\n", msg->text); } extern "C" int netconsd_output_init(int nr) { maps = new std::unordered_map[nr]; return 0; } extern "C" void netconsd_output_exit(void) { delete[] maps; } /* * This is the actual function called by netconsd. */ extern "C" void netconsd_output_handler(int t, struct in6_addr *src, struct msg_buf *buf, struct ncrx_msg *msg) { struct logtarget& cur = get_target(t, src); write_log(cur, buf, msg); } netconsd-0.4.1/modules/printer.c000066400000000000000000000022331457170767400166520ustar00rootroot00000000000000/* printer.c: Very simple example C netconsd module * * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #include #include #include #include #include #include int netconsd_output_init(int nr_workers) { printf("From init hook: %d worker threads", nr_workers); return 0; } void netconsd_output_exit(void) { puts("From exit hook"); } /* * This is the actual function called by netconsd. */ void netconsd_output_handler(int t, struct in6_addr *src, struct msg_buf *buf, struct ncrx_msg *msg) { char addr[INET6_ADDRSTRLEN] = {0}; inet_ntop(AF_INET6, src, addr, INET6_ADDRSTRLEN); if (!msg) printf("%40s: %s\n", addr, buf->buf); else printf("%40s: %s S%06" PRIu64 " T%014" PRIu64 " F%d/L%d %s%s%s%s%s\n", addr, msg->version, msg->seq, msg->ts_usec, msg->facility, msg->level, msg->cont_start ? "[CONT START] " : "", msg->cont ? "[CONT] " : "", msg->oos ? "[OOS] ": "", msg->seq_reset ? "[SEQ RESET] " : "", msg->text); } netconsd-0.4.1/ncrx/000077500000000000000000000000001457170767400143255ustar00rootroot00000000000000netconsd-0.4.1/ncrx/Makefile000066400000000000000000000006431457170767400157700ustar00rootroot00000000000000CC ?= gcc CFLAGS ?= -O2 -fPIC CFLAGS += -D_GNU_SOURCE -fno-strict-aliasing -Wall -Wextra \ -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations \ -Wdeclaration-after-statement -Wno-missing-field-initializers \ -Wno-unused-function -Wno-unused-parameter CPPFLAGS ?= obj = libncrx.o all: $(obj) %.o: %.c $(CC) $< $(CPPFLAGS) $(CFLAGS) $(INCLUDES) -c -o $@ clean: rm -f *.o netconsd-0.4.1/ncrx/libncrx.c000066400000000000000000000511251457170767400161360ustar00rootroot00000000000000/* * ncrx - extended netconsole receiver library * * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #include #include #include #include #include #include #include #include #include #include #include #include #include "ncrx.h" /* oos history is tracked with a uint32_t */ #define NCRX_OOS_MAX 32 struct ncrx_msg_list { struct ncrx_list head; int nr; /* number of msgs on the list */ }; struct ncrx_slot { struct ncrx_msg *msg; uint64_t timestamp; /* last rx on this slot */ uint64_t retx_timestamp; /* last retransmission */ struct ncrx_list hole_node; /* anchored @ ncrx->hole_list */ }; struct ncrx { struct ncrx_param p; uint64_t now_mono; /* latest time in msecs */ int head; /* next slot to use */ int tail; /* last slot in use */ uint64_t head_seq; /* next expected seq, unset=0 */ struct ncrx_slot *slots; /* msg slots */ struct ncrx_list hole_list; /* missing or !complete slots */ uint32_t oos_history; /* bit history of oos msgs */ struct ncrx_msg_list oos_list; /* buffered oos msgs */ struct ncrx_msg_list retired_list; /* msgs to be fetched by user */ uint64_t acked_seq; /* last seq acked, unset=max */ uint64_t acked_at; /* and when */ /* response buffer for ncrx_response() */ char resp_buf[NCRX_PKT_MAX + 1]; int resp_len; }; static const struct ncrx_param ncrx_dfl_param = { .nr_slots = NCRX_DFL_NR_SLOTS, .ack_intv = NCRX_DFL_ACK_INTV, .retx_intv = NCRX_DFL_RETX_INTV, .retx_stride = NCRX_DFL_RETX_STRIDE, .msg_timeout = NCRX_DFL_MSG_TIMEOUT, .oos_thr = NCRX_DFL_OOS_THR, .oos_intv = NCRX_DFL_OOS_INTV, .oos_timeout = NCRX_DFL_OOS_TIMEOUT, }; /* utilities mostly stolen from kernel */ #define min(x, y) ({ \ typeof(x) _min1 = (x); \ typeof(y) _min2 = (y); \ (void) (&_min1 == &_min2); \ _min1 < _min2 ? _min1 : _min2; }) #define max(x, y) ({ \ typeof(x) _max1 = (x); \ typeof(y) _max2 = (y); \ (void) (&_max1 == &_max2); \ _max1 > _max2 ? _max1 : _max2; }) #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) #define container_of(ptr, type, member) ({ \ const typeof( ((type *)0)->member ) *__mptr = (ptr); \ (type *)( (char *)__mptr - offsetof(type,member) );}) /* ncrx_msg from its ->node */ #define node_to_msg(ptr) container_of(ptr, struct ncrx_msg, node) /* iterate msg_list */ #define msg_list_for_each(pos, n, list) \ for (pos = node_to_msg((list)->head.next), \ n = node_to_msg(pos->node.next); \ &pos->node != &(list)->head; \ pos = n, n = node_to_msg(pos->node.next)) /* ncrx_slot from its ->hole_node */ #define hole_to_slot(ptr) \ container_of(ptr, struct ncrx_slot, hole_node) /* iterate hole_list */ #define hole_list_for_each(pos, n, list) \ for (pos = hole_to_slot((list)->next), \ n = hole_to_slot(pos->hole_node.next); \ &pos->hole_node != (list); \ pos = n, n = hole_to_slot(pos->hole_node.next)) static unsigned int hweight32(uint32_t w) { w -= (w >> 1) & 0x55555555; w = (w & 0x33333333) + ((w >> 2) & 0x33333333); w = (w + (w >> 4)) & 0x0f0f0f0f; return (w * 0x01010101) >> 24; } static void init_list(struct ncrx_list *head) { head->next = head; head->prev = head; } static int list_empty(struct ncrx_list *head) { return head->next == head; } static void list_del(struct ncrx_list *head) { struct ncrx_list *prev = head->prev; struct ncrx_list *next = head->next; prev->next = next; next->prev = prev; init_list(head); } static void list_append(struct ncrx_list *node, struct ncrx_list *list) { struct ncrx_list *prev = list->prev; assert(node->next == node && node->prev == node); node->next = list; node->prev = prev; prev->next = node; list->prev = node; } static void msg_list_del(struct ncrx_msg *msg, struct ncrx_msg_list *list) { list_del(&msg->node); list->nr--; if (!list->nr) assert(list->head.next == &list->head && list->head.prev == &list->head); } static void msg_list_append(struct ncrx_msg *msg, struct ncrx_msg_list *list) { list_append(&msg->node, &list->head); list->nr++; } static struct ncrx_msg *msg_list_peek(struct ncrx_msg_list *list) { if (list_empty(&list->head)) return NULL; return node_to_msg(list->head.next); } static struct ncrx_msg *msg_list_pop(struct ncrx_msg_list *list) { struct ncrx_msg *msg; msg = msg_list_peek(list); if (msg) msg_list_del(msg, list); return msg; } /* * Check if we have a kernel version in the very first field */ static int release_prepended(char *ptr) { char *dot_pos, *comma_pos; if (!ptr) return 0; dot_pos = memchr(ptr, '.', NCRX_KVERSION_MAX_LEN); comma_pos = memchr(ptr, ',', NCRX_KVERSION_MAX_LEN); if (!dot_pos || !comma_pos) return 0; if (dot_pos < comma_pos) return 1; return 0; } /* * Parse @payload into @msg. The data is not copied into @msg's buffer. * @msg->text and ->dict are updated to point into @payload instead. */ static int parse_packet(const char *payload, struct ncrx_msg *msg) { char buf[1024]; char *p, *tok; int idx; bool is_frag_seen = false, is_emg_seen = false; memset(msg, 0, sizeof(*msg)); p = strchr(payload, ';'); if (!p || p - payload >= (signed)sizeof(buf)) goto einval; memcpy(buf, payload, p - payload); buf[p - payload] = '\0'; msg->text = p + 1; msg->text_len = strlen(msg->text); if (msg->text_len > NCRX_LINE_MAX) msg->text_len = NCRX_LINE_MAX; /* [release,],,,[,KEY=VAL]* */ p = buf; if (release_prepended(p)) { idx = 0; } else { idx = 1; } while ((tok = strsep(&p, ","))) { char *endp, *key, *val; unsigned long long v; switch (idx++) { case 0: if (!tok) goto einval; strncpy(msg->version, tok, NCRX_KVERSION_MAX_LEN - 1); continue; case 1: v = strtoul(tok, &endp, 0); if (*endp != '\0' || v > UINT8_MAX) goto einval; msg->facility = v >> 3; msg->level = v & ((1 << 3) - 1); continue; case 2: v = strtoull(tok, &endp, 0); if (*endp != '\0') goto einval; msg->seq = v; continue; case 3: v = strtoull(tok, &endp, 0); if (*endp != '\0') goto einval; msg->ts_usec = v; continue; case 4: if (tok[0] == 'c') msg->cont_start = 1; else if (tok[0] == '+') msg->cont = 1; continue; } val = tok; key = strsep(&val, "="); if (!val) continue; if (!strcmp(key, "ncfrag")) { unsigned nf_off, nf_len; if (is_frag_seen) goto einval; if (sscanf(val, "%u/%u", &nf_off, &nf_len) != 2) goto einval; if (!msg->text_len || nf_len >= NCRX_LINE_MAX || nf_off >= nf_len || nf_off + msg->text_len > nf_len) goto einval; msg->ncfrag_off = nf_off; msg->ncfrag_len = msg->text_len; msg->ncfrag_left = nf_len - msg->ncfrag_len; msg->text_len = nf_len; is_frag_seen = true; } else if (!strcmp(key, "ncemg")) { if (is_emg_seen) goto einval; v = strtoul(val, &endp, 0); if (*endp != '\0') goto einval; msg->emg = v; is_emg_seen = true; } } return 0; einval: errno = EINVAL; return -1; } /* how far @idx is behind @ncrx->head */ static int slot_dist(int idx, struct ncrx *ncrx) { int dist = ncrx->head - idx; return dist >= 0 ? dist : dist + ncrx->p.nr_slots; } /* number of occupied slots */ static int nr_queued(struct ncrx *ncrx) { return slot_dist(ncrx->tail, ncrx); } /* seq of the last queued message */ static uint64_t tail_seq(struct ncrx *ncrx) { return ncrx->head_seq - nr_queued(ncrx); } /* slot index of a message with sequence number @ncrx->head_seq + @delta */ static int seq_delta_idx(struct ncrx *ncrx, int delta) { int idx = ncrx->head + delta; if (idx < 0) return idx + ncrx->p.nr_slots; else if (idx >= ncrx->p.nr_slots) return idx - ncrx->p.nr_slots; else return idx; } /* is @slot completely empty? */ static int slot_is_free(struct ncrx_slot *slot) { return !slot->msg && list_empty(&slot->hole_node); } /* @slot may have just been completed, if so, remove it from hole_list */ static void slot_maybe_complete(struct ncrx_slot *slot) { struct ncrx_msg *msg = slot->msg; if (!msg || msg->ncfrag_left || list_empty(&slot->hole_node)) return; list_del(&slot->hole_node); } /* retire the last queued slot whether complete or not */ static void retire_tail(struct ncrx *ncrx) { int ntail = (ncrx->tail + 1) % ncrx->p.nr_slots; struct ncrx_slot *slot = &ncrx->slots[ncrx->tail]; struct ncrx_slot *nslot = &ncrx->slots[ntail]; if (slot->msg) { msg_list_append(slot->msg, &ncrx->retired_list); slot->msg = NULL; } list_del(&slot->hole_node); /* free slot is never a hole */ ncrx->tail = ntail; /* * Activities of past msgs are considered activities for newer ones * too. This prevents oos interval verdicts from flipping as * sequence progresses. */ nslot->timestamp = max(slot->timestamp, nslot->timestamp); } /* make room for message with seq ncrx->head_seq + @delta */ static void make_room(struct ncrx *ncrx, int delta) { int i; /* head_seq is for the next msg, need to advance for 0 @delta too */ for (i = 0; i <= delta; i++) { struct ncrx_slot *slot; int max_busy = ncrx->p.nr_slots - ncrx->p.retx_stride; /* a new slot is considered hole until it gets completed */ slot = &ncrx->slots[ncrx->head]; assert(slot_is_free(slot)); list_append(&slot->hole_node, &ncrx->hole_list); slot->timestamp = ncrx->now_mono; slot->retx_timestamp = 0; /* * Wind the ring buffer and push out if overflowed. Always * keep at least one stride empty so that retransmissions * of expired slots don't count as oos. */ ncrx->head_seq++; ncrx->head = (ncrx->head + 1) % ncrx->p.nr_slots; if (slot_dist(ncrx->tail, ncrx) > max_busy) retire_tail(ncrx); } } /* * Get slot for @tmsg. On success, returns pointer to the slot which may * be free or occupied with partial or complete message. Returns NULL with * errno set to ERANGE if oos, NULL / ENOENT if already retired. */ static struct ncrx_slot *get_seq_slot(struct ncrx_msg *tmsg, struct ncrx *ncrx) { struct ncrx_slot *slot; int64_t delta; int idx; /* new seq stream */ if (!ncrx->head_seq) { ncrx->head_seq = tmsg->seq; ncrx->acked_seq = UINT64_MAX; tmsg->seq_reset = 1; } delta = tmsg->seq - ncrx->head_seq; /* * Consider oos if outside reorder window or if the slot is * complete and the last activity on it was more than oos_intv ago. * Emergency messages are never considered oos as they don't follow * the usual transmission pattern and may repeat indefinitely. */ if (-delta > ncrx->p.nr_slots || delta > ncrx->p.nr_slots) { errno = ERANGE; return NULL; } idx = seq_delta_idx(ncrx, delta); slot = &ncrx->slots[idx]; if (-delta > nr_queued(ncrx)) { int is_free = slot_is_free(slot); if (!tmsg->emg && (!is_free || slot->timestamp + ncrx->p.oos_intv < ncrx->now_mono)) { errno = ERANGE; return NULL; } if (is_free) slot->timestamp = ncrx->now_mono; errno = ENOENT; return NULL; } make_room(ncrx, delta); slot->timestamp = ncrx->now_mono; return slot; } /* make @src's copy, if @src is a fragment, allocate full size as it may grow */ static struct ncrx_msg *copy_msg(struct ncrx_msg *src) { struct ncrx_msg *dst; assert(!src->dict && !src->dict_len); dst = malloc(sizeof(*dst) + src->text_len + 1); if (!dst) return NULL; *dst = *src; init_list(&dst->node); dst->text = dst->buf; if (src->ncfrag_len) { memset(dst->text, 0, src->text_len + 1); memcpy(dst->text + src->ncfrag_off, src->text, src->ncfrag_len); dst->ncfrag_off = 0; dst->ncfrag_len = 0; } else { memcpy(dst->text, src->text, src->text_len); dst->text[dst->text_len] = '\0'; } return dst; } /* * @tmsg is a newly parsed msg which is out-of-sequence. Queue it on * @ncrx->oos_list until the message times out, gets pushed out by other * oos messages or the sequence stream gets reset. */ static int queue_oos_msg(struct ncrx_msg *tmsg, struct ncrx *ncrx) { struct ncrx_slot *slot; struct ncrx_msg *msg, *nmsg, *first; msg = copy_msg(tmsg); if (!msg) return -1; msg_list_append(msg, &ncrx->oos_list); /* * Shifted left automatically on each new msg. Set oos and see if * there have been too many oos among the last 32 messages. */ ncrx->oos_history |= 1; if ((signed)hweight32(ncrx->oos_history) < ncrx->p.oos_thr) { /* nope, handle oos overflow and handle */ if (ncrx->oos_list.nr > NCRX_OOS_MAX) { msg = msg_list_pop(&ncrx->oos_list); if (msg) { msg->oos = 1; msg_list_append(msg, &ncrx->retired_list); } } return 0; } /* * The current sequence stream seems no good. Let's reset by * retiring all pending, picking the oos msg with the lowest seq, * queueing it to reset the seq and then queueing all other oos * msgs. If a msg is still oos after reset, just retire it. */ while (ncrx->tail != ncrx->head) retire_tail(ncrx); ncrx->head_seq = 0; ncrx->acked_seq = UINT64_MAX; first = node_to_msg(ncrx->oos_list.head.next); msg_list_for_each(msg, nmsg, &ncrx->oos_list) first = msg->seq < first->seq ? msg : first; msg_list_del(first, &ncrx->oos_list); slot = get_seq_slot(first, ncrx); slot->msg = first; slot_maybe_complete(slot); while ((msg = msg_list_pop(&ncrx->oos_list))) { slot = get_seq_slot(msg, ncrx); if (slot) { slot->msg = msg; slot_maybe_complete(slot); } else { msg->oos = 1; msg_list_append(msg, &ncrx->retired_list); } } return 0; } /* @payload has just been received, parse and queue it */ static int ncrx_queue_payload(const char *payload, struct ncrx *ncrx, uint64_t now_real) { struct ncrx_msg tmsg = {}; struct ncrx_slot *slot; int new_msg = 0; if (parse_packet(payload, &tmsg)) return -1; tmsg.rx_at_mono = ncrx->now_mono; tmsg.rx_at_real = now_real; ncrx->oos_history <<= 1; /* ack immediately if logging source is doing emergency transmissions */ if (tmsg.emg) { ncrx->acked_seq = UINT64_MAX; ncrx->acked_at = 0; } /* get the matching slot and allocate a new message if empty */ slot = get_seq_slot(&tmsg, ncrx); if (slot && !slot->msg) { slot->msg = copy_msg(&tmsg); new_msg = 1; } if (!slot || !slot->msg) { if (errno == ENOENT) return 0; if (errno == ERANGE) return queue_oos_msg(&tmsg, ncrx); return -1; } if (!new_msg && slot->msg->ncfrag_left) { struct ncrx_msg *msg = slot->msg; int off = tmsg.ncfrag_off; int i; /* * we're merging a text fragment into the message text buffer. * the checks done here ensure that the received fragment values * are within bounds of the message text buffer. */ if (off >= msg->text_len || off + tmsg.ncfrag_len > msg->text_len) { return -1; } for (i = 0; i < tmsg.ncfrag_len; i++) { if (msg->text[off + i]) continue; msg->text[off + i] = tmsg.text[i]; msg->ncfrag_left--; } } slot_maybe_complete(slot); return 0; } /* * Build ncrx_response() output. Ack for the last retired msg is always * added. If @slot is non-NULL, re-transmission for it is also added. */ static void ncrx_build_resp(struct ncrx_slot *slot, struct ncrx *ncrx) { /* no msg received? */ if (!ncrx->head_seq) return; /* "ncrx" */ if (!ncrx->resp_len) { ncrx->acked_seq = tail_seq(ncrx) - 1; ncrx->acked_at = ncrx->now_mono; ncrx->resp_len = snprintf(ncrx->resp_buf, NCRX_PKT_MAX, "ncrx%"PRIu64, ncrx->acked_seq); } /* " ..." truncated to NCRX_PKT_MAX */ if (slot) { int idx = slot - ncrx->slots; int len; len = snprintf(ncrx->resp_buf + ncrx->resp_len, NCRX_PKT_MAX - ncrx->resp_len, " %"PRIu64, ncrx->head_seq - slot_dist(idx, ncrx)); if (ncrx->resp_len + len <= NCRX_PKT_MAX) { ncrx->resp_len += len; ncrx->resp_buf[ncrx->resp_len] = '\0'; } } } int ncrx_process(const char *payload, uint64_t now_mono, uint64_t now_real, struct ncrx *ncrx) { struct ncrx_slot *slot, *tmp_slot; struct ncrx_msg *msg; uint64_t old_head_seq = ncrx->head_seq; int dist_retx, ret = 0; if (now_mono < ncrx->now_mono) fprintf(stderr, "ncrx: time regressed %"PRIu64"->%"PRIu64"\n", ncrx->now_mono, now_mono); ncrx->now_mono = now_mono; ncrx->resp_len = 0; /* * If fully acked, keep last ack timestamp current so that new * messages arriving doesn't trigger ack timeout immediately. */ if (ncrx->acked_seq == tail_seq(ncrx) - 1) ncrx->acked_at = now_mono; /* parse and queue @payload */ if (payload) ret = ncrx_queue_payload(payload, ncrx, now_real); /* retire complete & timed-out msgs from tail */ while (ncrx->tail != ncrx->head) { slot = &ncrx->slots[ncrx->tail]; if ((!slot->msg || !list_empty(&slot->hole_node)) && slot->timestamp + ncrx->p.msg_timeout > now_mono) break; retire_tail(ncrx); } /* retire timed-out oos msgs */ while ((msg = msg_list_peek(&ncrx->oos_list))) { if (msg->rx_at_mono + ncrx->p.oos_timeout > now_mono) break; msg->oos = 1; msg_list_del(msg, &ncrx->oos_list); msg_list_append(msg, &ncrx->retired_list); } /* if enabled, ack pending and timeout expired? */ if (ncrx->p.ack_intv && ncrx->acked_seq != tail_seq(ncrx) - 1 && ncrx->acked_at + ncrx->p.ack_intv < now_mono) ncrx_build_resp(NULL, ncrx); /* head passed one or more re-transmission boundaries? */ dist_retx = old_head_seq / ncrx->p.retx_stride != ncrx->head_seq / ncrx->p.retx_stride; hole_list_for_each(slot, tmp_slot, &ncrx->hole_list) { int retx = 0; /* * If so, request re-tx of holes further away than stride. * This ensures that a missing seq is requested at least * certain number of times regardless of incoming rate. */ if (dist_retx && slot_dist(slot - ncrx->slots, ncrx) > ncrx->p.retx_stride) retx = 1; /* request re-tx every retx_intv */ if (now_mono - max(slot->timestamp, slot->retx_timestamp) >= (unsigned)ncrx->p.retx_intv) { slot->retx_timestamp = now_mono; retx = 1; } if (retx) ncrx_build_resp(slot, ncrx); } return ret; } const char *ncrx_response(struct ncrx *ncrx, int *lenp) { if (lenp) *lenp = ncrx->resp_len; if (ncrx->resp_len) return ncrx->resp_buf; return NULL; } /* parse out the dictionary in a complete message, if it exists */ static void terminate_msg_and_dict(struct ncrx_msg *msg) { msg->dict = strchr(msg->text, '\n'); if (msg->dict) { int len = msg->text_len; msg->text_len = msg->dict - msg->text; msg->text[msg->text_len] = '\0'; msg->dict_len = len - msg->text_len - 1; msg->dict++; } } struct ncrx_msg *ncrx_next_msg(struct ncrx *ncrx) { struct ncrx_msg *msg = msg_list_pop(&ncrx->retired_list); if (msg) terminate_msg_and_dict(msg); return msg; } uint64_t ncrx_invoke_process_at(struct ncrx *ncrx) { uint64_t when = UINT64_MAX; struct ncrx_msg *msg; /* ack enabled and pending? */ if (ncrx->p.ack_intv && ncrx->head_seq && ncrx->acked_seq != tail_seq(ncrx) - 1) when = min(when, ncrx->acked_at + ncrx->p.ack_intv); /* * Holes to request for retransmission? msg_timeout is the same * condition but way longer. Checking on retx_intv is enough. */ if (!list_empty(&ncrx->hole_list)) when = min(when, ncrx->now_mono + ncrx->p.retx_intv); /* oos timeout */ if ((msg = msg_list_peek(&ncrx->oos_list))) when = min(when, msg->rx_at_mono + ncrx->p.oos_timeout); /* min 10ms intv to avoid busy loop in case something goes bonkers */ return max(when, ncrx->now_mono + 10); } struct ncrx *ncrx_create(const struct ncrx_param *param) { const struct ncrx_param *dfl = &ncrx_dfl_param; struct ncrx_param *p; struct ncrx *ncrx; int i; ncrx = calloc(1, sizeof(*ncrx)); if (!ncrx) return NULL; p = &ncrx->p; if (param) { p->nr_slots = param->nr_slots ?: dfl->nr_slots; p->ack_intv = param->ack_intv ?: dfl->ack_intv; p->retx_intv = param->retx_intv ?: dfl->retx_intv; p->retx_stride = param->retx_stride ?: dfl->retx_stride; p->msg_timeout = param->msg_timeout ?: dfl->msg_timeout; p->oos_thr = param->oos_thr ?: dfl->oos_thr; p->oos_intv = param->oos_intv ?: dfl->oos_intv; p->oos_timeout = param->oos_timeout ?: dfl->oos_timeout; } else { *p = *dfl; } ncrx->acked_seq = UINT64_MAX; init_list(&ncrx->hole_list); init_list(&ncrx->oos_list.head); init_list(&ncrx->retired_list.head); ncrx->slots = calloc(ncrx->p.nr_slots, sizeof(ncrx->slots[0])); if (!ncrx->slots) { free(ncrx); return NULL; } for (i = 0; i < ncrx->p.nr_slots; i++) init_list(&ncrx->slots[i].hole_node); return ncrx; } void ncrx_destroy(struct ncrx *ncrx) { struct ncrx_msg *msg; int i; for (i = 0; i < ncrx->p.nr_slots; i++) free(ncrx->slots[i].msg); while ((msg = msg_list_pop(&ncrx->oos_list))) free(msg); while ((msg = msg_list_pop(&ncrx->retired_list))) free(msg); free(ncrx->slots); free(ncrx); } netconsd-0.4.1/ncrx/ncrx-struct.h000066400000000000000000000036731457170767400170030ustar00rootroot00000000000000/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #ifndef __NETCONSOLE_NCRX_STRUCT__ #define __NETCONSOLE_NCRX_STRUCT__ struct ncrx_list { struct ncrx_list *next; struct ncrx_list *prev; }; #define NCRX_KVERSION_MAX_LEN 64 /* * ncrx_msg represents a single log message and what gets returned from * ncrx_next_msg(). Most of the public fields are self-explanatory except * for the followings. * * oos * The message's sequence number doesn't match up with the current * message stream. Could be from a foreign source or corrupt. Ignore * when counting missing messages. * * seq_reset * The sequence number stream has jumped. This usually happens when * the log source reboots. The first message returned after ncrx * initialization always has this flag set. */ struct ncrx_msg { /* public fields */ uint64_t seq; /* printk sequence number */ uint64_t ts_usec; /* printk timestamp in usec */ char *text; /* message body */ char *dict; /* optional dictionary */ int text_len; /* message body length */ int dict_len; /* dictionary length */ uint8_t facility; /* log facility */ uint8_t level; /* printk level */ unsigned cont_start:1; /* first of continued msgs */ unsigned cont:1; /* continuation of prev msg */ unsigned oos:1; /* sequence out-of-order */ unsigned seq_reset:1; /* sequence reset */ /* private fields */ struct ncrx_list node; uint64_t rx_at_mono; /* monotonic rx time in msec */ uint64_t rx_at_real; /* real rx time in msec */ int ncfrag_off; /* netconsole frag offset */ int ncfrag_len; /* netconsole frag len */ int ncfrag_left; /* number of missing bytes */ /* kernel release version */ char version[NCRX_KVERSION_MAX_LEN]; unsigned emg:1; /* emergency transmission */ char buf[]; }; #endif /* __NETCONSOLE_NCRX_STRUCT__ */ netconsd-0.4.1/ncrx/ncrx.c000066400000000000000000000066211457170767400154500ustar00rootroot00000000000000/* * ncrx - simple extended netconsole receiver * * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #include #include #include #include #include #include #include #include #include #include "ncrx.h" union sockaddr_in46 { struct sockaddr addr; struct sockaddr_in6 in6; struct sockaddr_in in4; }; int main(int argc, char **argv) { char buf[NCRX_LINE_MAX + 1]; struct ncrx_param param = { .ack_intv = 1000 }; struct ncrx *ncrx; struct sockaddr_in6 laddr = { }; uint64_t next_seq = 0, next_at = UINT64_MAX, now; int prev_cont = 0; int fd; if (argc != 2) { fprintf(stderr, "Usage: ncrx PORT\n"); return 1; } fd = socket(AF_INET6, SOCK_DGRAM, 0); if (fd < 0) { perror("socket"); return 1; } laddr.sin6_family = AF_INET6; laddr.sin6_addr = in6addr_any; laddr.sin6_port = htons(atoi(argv[1])); if (bind(fd, (struct sockaddr *)&laddr, sizeof(laddr)) < 0) { perror("bind"); return 1; } ncrx = ncrx_create(¶m); if (!ncrx) { perror("ncrx_create"); return 1; } while (1) { struct pollfd pfd = { .fd = fd, .events = POLLIN }; union sockaddr_in46 raddr; struct ncrx_msg *msg; struct timespec ts; socklen_t raddr_len = sizeof(raddr); char *payload = NULL; const char *resp; int timeout; int len; /* determine sleep interval and poll */ timeout = -1; if (next_at != UINT64_MAX) { timeout = 0; if (next_at > now) timeout = next_at - now; } if (poll(&pfd, 1, timeout) < 0) { perror("poll"); return 1; } /* receive message */ len = recvfrom(fd, buf, sizeof(buf) - 1, MSG_DONTWAIT, (struct sockaddr *)&raddr, &raddr_len); payload = NULL; if (len >= 0) { buf[len] = '\0'; payload = buf; } else if (errno != EAGAIN) { perror("recv"); return 1; } /* determine the current time */ if (clock_gettime(CLOCK_MONOTONIC, &ts)) { perror("clock_gettime"); return 1; } now = ts.tv_sec * 1000 + ts.tv_nsec / 1000000; /* process the payload and perform rx operations */ if (ncrx_process(payload, now, 0, ncrx) && errno != ENOENT) { if (errno == EINVAL) { while (len && isspace(payload[len - 1])) payload[--len] = '\0'; printf("[%12s] %s\n", "INVAL", payload); } else { perror("ncrx_process"); } } resp = ncrx_response(ncrx, &len); if (resp && sendto(fd, resp, len, 0, (struct sockaddr *)&raddr, raddr_len) < 0) perror("sendto"); while ((msg = ncrx_next_msg(ncrx))) { const char *pnl = prev_cont ? "\n" : ""; if (msg->oos) { printf("%s[%12s] %s\n", pnl, "OOS", msg->text); prev_cont = 0; continue; } if (msg->seq_reset) { printf("%s[%12s] seq=%"PRIu64"\n", pnl, "SEQ RESET", msg->seq); next_seq = msg->seq; } if (msg->seq != next_seq) { printf("%s[%12s] %"PRIu64" messages skipped\n", pnl, "SEQ SKIPPED", msg->seq - next_seq); } next_seq = msg->seq + 1; if (!msg->cont || !prev_cont) printf("%s[%5"PRIu64".%06"PRIu64"] ", pnl, msg->ts_usec / 1000000, msg->ts_usec % 1000000); printf("%s", msg->text); prev_cont = msg->cont_start || msg->cont; if (!prev_cont) printf("\n"); } next_at = ncrx_invoke_process_at(ncrx); } return 0; } netconsd-0.4.1/ncrx/ncrx.h000066400000000000000000000116401457170767400154520ustar00rootroot00000000000000/* * ncrx - extended netconsole receiver library * * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #ifndef __NETCONSOLE_NCRX__ #define __NETCONSOLE_NCRX__ #include #define NCRX_LINE_MAX 8192 /* max payload len for responses, this is what netconsole uses on tx side */ #define NCRX_PKT_MAX 1000 #include "ncrx-struct.h" /* * ncrx parameters. Specify NULL to use defaults for all. Specify 0 to use * default for individual parameters. All time periods are in millisecs. * * nr_slots * The number of reorder slots. This bounds the maximum memory which * may be consumed by the ncrx instance. Lowering this number * increases the chance of the ordering window passing by a missing * message before it can be obtained leading to missed messages. * * ack_intv * A received message is acked after this period. Transmission side * ack timeout is 10s and this should be shorter than that. * * retx_intv * Retransmission request is sent and repeated every this period. * * retx_stride * A missing message generates retransmission request whenever it gets * pushed back this number of slots by newly arriving message. * * msg_timeout * A missing message expires after this period and the sequence number * will be skipped in the output. * * oos_thr * Among last 32 message, if more than this number of messages are * out-of-order, the message stream is reset. * * oos_intv * A message is considered out-of-sequence only if the last message * received with the sequence number is older than this. * * oos_timeout * If sequence is not reset in this period after reception of an * out-of-order message, the message is output. */ struct ncrx_param { int nr_slots; int ack_intv; int retx_intv; int retx_stride; int msg_timeout; int oos_thr; int oos_intv; int oos_timeout; }; /* default params */ #define NCRX_DFL_NR_SLOTS 8192 #define NCRX_DFL_ACK_INTV 0 /* disable ack logic by default */ #define NCRX_DFL_RETX_INTV 1000 #define NCRX_DFL_RETX_STRIDE 256 #define NCRX_DFL_MSG_TIMEOUT 30000 #define NCRX_DFL_OOS_THR (32 * 3 / 5) /* 19 */ #define NCRX_DFL_OOS_INTV 5000 #define NCRX_DFL_OOS_TIMEOUT NCRX_DFL_MSG_TIMEOUT /* * A ncrx instance is created by ncrx_create() and destroyed by * ncrx_destroy(). All accesses to a given instance must be serialized; * however, a process may create any number of instances and use them * concurrently. */ struct ncrx; struct ncrx *ncrx_create(const struct ncrx_param *param); void ncrx_destroy(struct ncrx *ncrx); /* * A ncrx instance doesn't do any IO or blocking. It's just a state * machine that the user can feed data into and get the results out of. * * ncrx_process() * Process @payload of a packet. @now_mono is the current time in msecs. * The origin doesn't matter as long as it's monotonously increasing. * @payload may be NULL. See ncrx_invoke_process_at(). * * @now_real is an optional timestamp which will be stored at rx_at_real * in the resulting ncrx_msg struct. The library does not use this value * at all, so it can be zero. * * Returns 0 on success. 1 on failure with errno set. EINVAL * indicates that @payload is not a valid extended netconsole message. * * ncrx_response() * The response to send to log source. If the user calls this * function after each ncrx_process() invocation and sends back the * output, re- and emergency transmissions are activated increasing * the reliability especially if the network is flaky. If not, ncrx * will passively reorder and assemble messages. * * Returns pointer to '\0' terminated response string or NULL if * there's nothing to send back. If @lenp is not NULL, *@lenp is set * to the length of the response string. * * ncrx_next_msg() * Fetches the next completed message. Call repeatedly until NULL is * returned after each ncrx_process() invocation. Each message should * be free()'d by the user after consumption. * * ncrx_invoke_process_at() * Message processing is timing dependent and ncrx often needs to take * actions after a certain time period even when there hasn't been any * new packets. This function indicates when the caller should invoke * ncrx_process() at the latest. * * The returned time is relative to @now previously provided to * ncrx_process(). e.g. if ncrx_process() needs to be invoked after 4 * seconds since the last invocation where @now was 60000, this * function will return 64000. Returns UINT64_MAX if there's no * pending timing dependent operation. * * See tools/ncrx/ncrx.c for a simple example. */ int ncrx_process(const char *payload, uint64_t now_mono, uint64_t now_real, struct ncrx *ncrx); const char *ncrx_response(struct ncrx *ncrx, int *lenp); struct ncrx_msg *ncrx_next_msg(struct ncrx *ncrx); uint64_t ncrx_invoke_process_at(struct ncrx *ncrx); #endif /* __NETCONSOLE_NCRX__ */ netconsd-0.4.1/ncrx/nctx.c000066400000000000000000000265341457170767400154570ustar00rootroot00000000000000/* * nctx - extended netconsole sender * * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ncrx.h" /* in msecs */ #define ACK_TIMEOUT 10000 #define EMG_TX_MAX_INTV 1000 #define EMG_TX_MIN_INTV 100 union sockaddr_in46 { struct sockaddr addr; struct sockaddr_in6 in6; struct sockaddr_in in4; }; struct kmsg_slot { char *msg; uint64_t ts; }; struct kmsg_ring { int head; int tail; int nr_slots; uint64_t head_seq; union sockaddr_in46 raddr; int raddr_len; int emg_tx_intv; uint64_t emg_tx_seq; uint64_t emg_tx_ts; struct kmsg_slot *slots; }; /* relative time in msecs */ static uint64_t current_msec(void) { struct timespec ts; if (clock_gettime(CLOCK_MONOTONIC, &ts)) { perror("clock_gettime"); exit(1); } return ts.tv_sec * 1000 + ts.tv_nsec / 1000000; } static int kmsg_ring_init(struct kmsg_ring *ring, int nr_slots) { memset(ring, 0, sizeof(*ring)); ring->slots = malloc(sizeof(ring->slots[0]) * nr_slots); if (!ring->slots) return -1; ring->nr_slots = nr_slots; return 0; } /* advance @ring's head by one, if head catches up with tail, clip it */ static void kmsg_ring_advance(struct kmsg_ring *ring) { struct kmsg_slot *slot; ring->head_seq++; ring->head = (ring->head + 1) % ring->nr_slots; slot = &ring->slots[ring->head]; if (ring->tail == ring->head) { free(slot->msg); memset(slot, 0, sizeof(*slot)); ring->tail = (ring->tail + 1) % ring->nr_slots; } } /* fill @ring with kmsgs from @devkmsg, returns 0 on success, -1 on failure */ static int kmsg_ring_fill(struct kmsg_ring *ring, int devkmsg) { char buf[NCRX_LINE_MAX]; struct kmsg_slot *slot; int level; uint64_t seq; ssize_t len; next_line: do { len = read(devkmsg, buf, sizeof(buf) - 1); /* * EPIPE indicates skipped messages. kmsgs are always * stored according to their sequence numbers, so we don't * need to do anything special on EPIPE. Keep reading. */ } while (len < 0 && errno == EPIPE); if (len < 0) { if (errno == EAGAIN) return 0; return -1; } /* read seq and see if it makes sense */ buf[len] = '\0'; if (sscanf(buf, "%d,%"SCNu64",", &level, &seq) != 2 || seq < ring->head_seq) { fprintf(stderr, "Warning: malformed kmsg \"%s\"\n", buf); goto next_line; } /* wind ring till head is at the right slot and store */ while (ring->head_seq < seq) kmsg_ring_advance(ring); slot = &ring->slots[ring->head]; slot->msg = strdup(buf); if (!slot->msg) return -1; slot->ts = current_msec(); kmsg_ring_advance(ring); goto next_line; } /* sequence number of the oldest occupied slot in @ring */ static uint64_t kmsg_ring_tail_seq(struct kmsg_ring *ring) { int nr; nr = ring->head - ring->tail; if (nr < 0) nr += ring->nr_slots; return ring->head_seq - nr; } /* peek kmsg matching @seq, NULL if not found */ static char *kmsg_ring_peek(struct kmsg_ring *ring, uint64_t seq) { int idx; if (seq < kmsg_ring_tail_seq(ring) || seq >= ring->head_seq) return NULL; idx = ring->head - (int)(ring->head_seq - seq); if (idx < 0) idx += ring->nr_slots; return ring->slots[idx].msg; } /* free slots upto @upto_seq, tail_seq is @upto_seq + 1 afterwards */ static void kmsg_ring_consume(struct kmsg_ring *ring, uint64_t upto_seq) { uint64_t tail_seq = kmsg_ring_tail_seq(ring); int tail = ring->tail; if (!ring->head_seq || upto_seq < tail_seq) return; if (upto_seq >= ring->head_seq) upto_seq = ring->head_seq - 1; while (tail_seq <= upto_seq) { struct kmsg_slot *slot = &ring->slots[ring->head]; free(slot->msg); memset(slot, 0, sizeof(*slot)); tail_seq++; tail = (tail + 1) % ring->nr_slots; /* made progress, reset emergency tx */ ring->emg_tx_intv = 0; } ring->tail = tail; } /* * Send @msg to @addr via @sock. If @msg is too long, split into * NCRX_PKT_MAX byte chunks with ncfrag header added. If @is_emg_tx is * set, add ncemg header. */ static void send_kmsg(int sock, char *msg, int is_emg_tx, struct sockaddr *addr, int addr_len) { char buf[NCRX_PKT_MAX + 1]; const int max_extra_len = sizeof(",ncemg=1,ncfrag=0000/0000"); const char *header, *body; int msg_len = strlen(msg); int header_len = msg_len, body_len = 0; int chunk_len, nr_chunks, i; if (!is_emg_tx && msg_len <= NCRX_PKT_MAX) { sendto(sock, msg, msg_len, 0, addr, addr_len); return; } /* need to insert extra header fields, detect header and body */ header = msg; body = memchr(msg, ';', msg_len); if (body) { header_len = body - header; body_len = msg_len - header_len - 1; body++; } chunk_len = NCRX_PKT_MAX - header_len - max_extra_len; if (chunk_len <= 0) { fprintf(stderr, "Error: invalid chunk_len %d in send_kmsg()\n", chunk_len); return; } /* * Transfer possibly multiple chunks with extra header fields. * * For emergency transfers due to missing acks, add "emg=1". * * If @msg needs to be split to fit NCRX_PKT_MAX, add * "ncfrag=/" to identify each chunk. */ memcpy(buf, header, header_len); nr_chunks = (body_len + chunk_len - 1) / chunk_len; for (i = 0; i < nr_chunks; i++) { int offset = i * chunk_len; int this_header = header_len; int this_chunk; this_chunk = body_len - offset; if (this_chunk > chunk_len) this_chunk = chunk_len; if (is_emg_tx && this_header < sizeof(buf)) this_header += snprintf(buf + this_header, sizeof(buf) - this_header, ",ncemg=1"); if (nr_chunks > 1 && this_header < sizeof(buf)) this_header += snprintf(buf + this_header, sizeof(buf) - this_header, ",ncfrag=%d/%d", offset, body_len); if (this_header < sizeof(buf)) this_header += snprintf(buf + this_header, sizeof(buf) - this_header, ";"); if (this_header + chunk_len > NCRX_PKT_MAX) { fprintf(stderr, "Error: this_header %d is too large for chunk_len %d in send_kmsg()\n", this_header, chunk_len); return; } memcpy(buf + this_header, body, this_chunk); sendto(sock, buf, this_header + this_chunk, 0, addr, addr_len); body += this_chunk; } } /* rx and handle response packets from @sock, returns 0 on success, -1 on err */ static int kmsg_ring_process_resps(struct kmsg_ring *ring, int sock) { char rx_buf[NCRX_PKT_MAX + 1]; union sockaddr_in46 raddr; struct iovec iov = { .iov_base = rx_buf, .iov_len = NCRX_PKT_MAX }; struct msghdr msgh = { .msg_name = &raddr.addr, .msg_iov = &iov, .msg_iovlen = 1 }; ssize_t len; char *pos, *tok; uint64_t seq; next_packet: msgh.msg_namelen = sizeof(raddr); len = recvmsg(sock, &msgh, MSG_DONTWAIT); if (len < 0) { if (errno == EAGAIN) return 0; return -1; } rx_buf[len] = '\0'; pos = rx_buf; tok = strsep(&pos, " "); /* "ncrx" header */ if (strncmp(tok, "ncrx", 4)) { char addr_str[INET6_ADDRSTRLEN]; if (raddr.addr.sa_family == AF_INET6) inet_ntop(AF_INET6, &raddr.in6.sin6_addr, addr_str, sizeof(addr_str)); else inet_ntop(AF_INET, &raddr.in4.sin_addr, addr_str, sizeof(addr_str)); fprintf(stderr, "Warning: malformed packet from [%s]:%u\n", addr_str, ntohs(raddr.in4.sin_port)); goto next_packet; } tok += 4; /* */ if (sscanf(tok, "%"SCNu64, &seq)) kmsg_ring_consume(ring, seq); /* ... */ while ((tok = strsep(&pos, " "))) { if (sscanf(tok, "%"SCNu64, &seq)) { char *msg = kmsg_ring_peek(ring, seq); if (msg) send_kmsg(sock, msg, 0, &raddr.addr, msgh.msg_namelen); } } /* stash remote address for emergency tx */ ring->raddr = raddr; ring->raddr_len = msgh.msg_namelen; goto next_packet; } /* * Perform emergency tx if necessary. Must be called after @ring is filled * and responses are processed. Returns the duration in msecs after which * this function should be invoked again. If -1, timeout isn't necessary. */ static int kmsg_ring_emg_tx(struct kmsg_ring *ring, int sock) { struct kmsg_slot *slot = &ring->slots[ring->tail]; uint64_t target, now; uint64_t tail_seq; char *msg; /* if @ring is empty or remote site is not established, nothing to do */ if (ring->head == ring->tail || !ring->raddr_len) { ring->emg_tx_intv = 0; return -1; } /* calculate the next deadline, if in the future, return the diff */ if (!ring->emg_tx_intv) target = slot->ts + ACK_TIMEOUT; else target = ring->emg_tx_ts + ring->emg_tx_intv; now = current_msec(); if (target > now) return target - now; tail_seq = kmsg_ring_tail_seq(ring); if (!ring->emg_tx_intv) { /* new emg tx session */ ring->emg_tx_intv = EMG_TX_MIN_INTV; ring->emg_tx_seq = tail_seq; } else if (ring->emg_tx_seq < ring->head_seq) { /* in the middle of emg tx session */ ring->emg_tx_seq++; if (ring->emg_tx_seq < tail_seq) ring->emg_tx_seq = tail_seq; } else { /* finished one session, increase intv and repeat */ ring->emg_tx_intv *= 2; if (ring->emg_tx_intv < EMG_TX_MAX_INTV) ring->emg_tx_intv = EMG_TX_MAX_INTV; ring->emg_tx_seq = tail_seq; } msg = kmsg_ring_peek(ring, ring->emg_tx_seq); if (msg) send_kmsg(sock, msg, 1, &ring->raddr.addr, ring->raddr_len); ring->emg_tx_ts = now; return ring->emg_tx_intv; } static void usage_err(const char *err) { if (err) fprintf(stderr, "Error: %s\n", err); fprintf(stderr, "Usage: nctx [-n nr_slots] [-k devkmsg_path] ip port\n"); exit(1); } int main(int argc, char **argv) { union sockaddr_in46 laddr = { }; struct pollfd pfds[2] = { }; struct kmsg_ring kmsg_ring; const char *devkmsg_path = "/dev/kmsg"; int nr_slots = NCRX_DFL_NR_SLOTS; int sleep_dur = -1; int opt, port, sock, devkmsg; socklen_t addrlen; while ((opt = getopt(argc, argv, "n:k:h?")) != -1) { switch (opt) { case 'n': nr_slots = atoi(optarg); if (nr_slots <= 0) usage_err("nr_slots must be a positive number"); break; case 'k': devkmsg_path = optarg; break; default: usage_err(NULL); } } if (optind + 2 != argc) usage_err(NULL); if (inet_pton(AF_INET6, argv[optind], &laddr.in6.sin6_addr)) { laddr.addr.sa_family = AF_INET6; addrlen = sizeof(laddr.in6); } else if (inet_pton(AF_INET, argv[optind], &laddr.in4.sin_addr)) { laddr.addr.sa_family = AF_INET; addrlen = sizeof(laddr.in4); } else { usage_err("invalid IP address"); } port = atoi(argv[optind + 1]); if (port <= 0 || port > 65535) usage_err("invalid port number"); laddr.in4.sin_port = htons(port); sock = socket(laddr.addr.sa_family, SOCK_DGRAM, 0); if (sock < 0) { perror("socket"); return 1; } if (bind(sock, &laddr.addr, addrlen)) { perror("bind"); return 1; } devkmsg = open(devkmsg_path, O_RDONLY | O_NONBLOCK); if (devkmsg < 0) { perror("open"); return 1; } if (kmsg_ring_init(&kmsg_ring, nr_slots)) { perror("kmsg_ring_init"); return 1; } pfds[0].events = POLLIN; pfds[1].events = POLLIN; pfds[0].fd = devkmsg; pfds[1].fd = sock; while (poll(pfds, 2, sleep_dur) >= 0) { if (kmsg_ring_fill(&kmsg_ring, devkmsg)) { perror("kmsg_ring_fill"); return 1; } if (kmsg_ring_process_resps(&kmsg_ring, sock)) { perror("kmsg_ring_process_resps"); return 1; } sleep_dur = kmsg_ring_emg_tx(&kmsg_ring, sock); } perror("poll"); return 1; } netconsd-0.4.1/ncrx/netcons-gen.py000077500000000000000000000073561457170767400171350ustar00rootroot00000000000000#!/usr/bin/env python3 # # Copyright (c) Meta Platforms, Inc. and affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. # """ This tool produces netcons messages for testing (mostly of {lib,}ncrx). Usual usage: 1. Run `ncrx [port]` listening in one shell 2. In another shell, run `netcons-gen [...] | nc -u 127.0.0.1 [port]` """ import argparse import random import sys import time from enum import Enum class Level(Enum): LOG_EMERG = 0 LOG_ALERT = 1 LOG_CRIT = 2 LOG_ERR = 3 LOG_WARNING = 4 LOG_NOTICE = 5 LOG_INFO = 6 LOG_DEBUG = 7 class Facility(Enum): LOG_KERN = 0 LOG_USER = 1 LOG_MAIL = 2 LOG_DAEMON = 3 LOG_AUTH = 4 LOG_SYSLOG = 5 LOG_LPR = 6 LOG_NEWS = 7 LOG_UUCP = 8 LOG_CRON = 9 LOG_AUTHPRIV = 10 LOG_LOCAL0 = 16 LOG_LOCAL1 = 17 LOG_LOCAL2 = 18 LOG_LOCAL3 = 19 LOG_LOCAL4 = 20 LOG_LOCAL5 = 21 LOG_LOCAL6 = 22 LOG_LOCAL7 = 23 class Mode(Enum): NORMAL = 0 SKIP = 1 RESET = 2 ARG_TO_MODE_MAP = {"reset": Mode.RESET, "skip": Mode.SKIP} def make_dictionary_string(msg): """Format X=Y\0X=Y, no trailing \0""" return "\0".join("{}={}".format(k, v) for k, v in msg.items()) def make_ext_header(seq, facility, level, cont): """ See printk.c's msg_print_ext_header for format spec. """ faclev = (facility.value << 3) | level.value ts_usec = int(time.monotonic() * (10**6)) return "{},{},{},{};".format(faclev, seq, ts_usec, "c" if cont else "-") def _body_escape(text): return text.replace("\0", "\n") def make_ext_body(text, dict_str): """ See printk.c's msg_print_ext_body for format spec. Escaping of unprintables is currently unimplemented. """ return "{}\n{}".format(_body_escape(text), _body_escape(dict_str)) def make_netcons_msg( seq=0, facility=Facility.LOG_KERN, level=Level.LOG_ERR, cont=False, text="text", meta_dict=None, ): if meta_dict is None: meta_dict = {"DICT": "test"} dict_str = make_dictionary_string(meta_dict) header = make_ext_header(seq=seq, facility=facility, level=level, cont=cont) body = make_ext_body(text=text, dict_str=dict_str) return "{}{}".format(header, body) def parse_args(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--skip", action="store_true", help="Randomly skip sequence numbers" ) parser.add_argument( "--reset", action="store_true", help="Randomly reset the sequence to 0 again" ) parser.add_argument( "--cont", action="store_true", help="Randomly insert LOG_CONT messages" ) return parser.parse_args() def main() -> None: args = parse_args() enabled_modes = [Mode.NORMAL] for arg_name, mode in ARG_TO_MODE_MAP.items(): if getattr(args, arg_name): enabled_modes.append(mode) seq = 0 cont = False while True: print( make_netcons_msg( seq=seq, text="hi", meta_dict={"UNAME": "it's minix i swear"}, cont=cont ), flush=True, ) chosen_mode = random.choice(enabled_modes) if chosen_mode == Mode.NORMAL: new_seq = seq + 1 elif chosen_mode == Mode.SKIP: new_seq = seq + random.randint(1, 5) elif chosen_mode == Mode.RESET: new_seq = 0 if args.cont: cont = random.choice([True, False]) print( "seq: {} -> {}, mode: {}, cont: {}".format(seq, new_seq, chosen_mode, cont), file=sys.stderr, ) seq = new_seq time.sleep(0.5) if __name__ == "__main__": main() # pragma: no cover netconsd-0.4.1/output.c000066400000000000000000000044551457170767400150670ustar00rootroot00000000000000/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #include #include #include #include #include #include #include #include #include #include "include/common.h" #include "include/msgbuf-struct.h" #include "include/output.h" static void *output_dlhandles[MAXOUTS]; static void (*outputs[MAXOUTS])(int, struct in6_addr *, struct msg_buf *, struct ncrx_msg *); static int nr_outputs; int register_output_module(char *path, int nr_workers) { void *dl, *dlsym_addr; int (*mod_init)(int); int ret; if (nr_outputs == MAXOUTS) { warn("Too many output modules!\n"); return -1; } log("Loading module '%s'\n", path); dl = dlopen(path, RTLD_NOW|RTLD_LOCAL); if (!dl) { warn("Can't open '%s': %s", path, dlerror()); return -1; } dlsym_addr = dlsym(dl, "netconsd_output_handler"); if (!dlsym_addr) { warn("Can't find handler sym in '%s': %s", path, dlerror()); goto err_close; } mod_init = dlsym(dl, "netconsd_output_init"); if (mod_init) { log("Calling mod_init() for '%s'\n", path); ret = mod_init(nr_workers); if (ret) { warn("mod_init() for '%s' failed: %d\n", path, ret); goto err_close; } } log("Module '%s' registered (#%d@%p)\n", path, nr_outputs, dlsym_addr); output_dlhandles[nr_outputs] = dl; outputs[nr_outputs] = dlsym_addr; nr_outputs++; return 0; err_close: dlclose(dl); return -1; } void destroy_output_modules(void) { int i, ret; void (*mod_exit)(void); char path[PATH_MAX] = {0}; for (i = 0; i < nr_outputs; i++) { if (dlinfo(output_dlhandles[i], RTLD_DI_ORIGIN, path)) strncpy(path, dlerror(), PATH_MAX - 1); mod_exit = dlsym(output_dlhandles[i], "netconsd_output_exit"); if (mod_exit) { log("Calling mod_exit() for '%s'\n", path); mod_exit(); } log("Unloading module '%s' (#%d@%p)\n", path, i, outputs[i]); ret = dlclose(output_dlhandles[i]); if (ret) warn("dlclose() failed: %s\n", dlerror()); } } void execute_output_pipeline(int thread_nr, struct in6_addr *src, struct msg_buf *buf, struct ncrx_msg *msg) { int i; for (i = 0; i < nr_outputs; i++) outputs[i](thread_nr, src, buf, msg); } netconsd-0.4.1/rust/000077500000000000000000000000001457170767400143505ustar00rootroot00000000000000netconsd-0.4.1/rust/blaster/000077500000000000000000000000001457170767400160045ustar00rootroot00000000000000netconsd-0.4.1/rust/blaster/Cargo.toml000066400000000000000000000004411457170767400177330ustar00rootroot00000000000000[package] name = "netconsblaster" version = "0.1.0" edition = "2021" [[bin]] name = "netconsblaster" path = "src/main.rs" [lib] name = "libblaster" path = "src/lib.rs" [dependencies] libc = "0.2.40" byteorder = "1.4.3" clap = {version = "3.2.22", features = ["derive"] } rand = "0.8.5" netconsd-0.4.1/rust/blaster/src/000077500000000000000000000000001457170767400165735ustar00rootroot00000000000000netconsd-0.4.1/rust/blaster/src/lib.rs000066400000000000000000000122051457170767400177070ustar00rootroot00000000000000/* * Utility functions to send fake netconsole messages, can be used to test * netconsd and its modules. * * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ use std::io::Cursor; use std::mem::size_of; use std::thread::sleep; use std::time::Duration; use byteorder::BigEndian; use byteorder::ReadBytesExt; use libc::c_int; use libc::c_void; use libc::in6_addr; use libc::sendto; use libc::sockaddr; use libc::sockaddr_in6; use libc::socket; use libc::AF_INET6; use libc::IPPROTO_RAW; use libc::IPPROTO_UDP; use libc::SOCK_RAW; #[derive(Debug)] pub struct WorkerConfig { pub id: u8, pub packets_count: u64, pub dst_port: u16, pub sleep_duration: Option, pub extended_msg: bool, pub sender_addr_rnd_bytes: usize, } #[repr(C, packed)] #[derive(Default)] struct UdpHdr { src_port: u16, dst_port: u16, len: u16, check: u16, } #[repr(C, packed)] #[derive(Default)] struct Ip6Hdr { ctl: u32, plen: u16, next: u8, hlim: u8, src: [u8; 16], dst: [u8; 16], } #[repr(C, packed)] struct NetconsPacket { l3: Ip6Hdr, l4: UdpHdr, payload: [u8; 64], } impl NetconsPacket { fn new(dst_ip: [u8; 16], dst_port: u16) -> NetconsPacket { let len: u16 = 64 /* payload fixed length */ + 8 /* size of UdpHdr */; NetconsPacket { l3: Ip6Hdr { ctl: 6 << 4, plen: len.to_be(), next: IPPROTO_UDP as u8, hlim: 64, src: [0u8; 16], dst: dst_ip, }, l4: UdpHdr { src_port: 6666u16.to_be(), dst_port: dst_port.to_be(), len: len.to_be(), check: 0, }, payload: [0u8; 64], } } fn update_checksum(&mut self) { self.l4.check = compute_checksum(self); } fn set_payload(&mut self, msg: &str) { self.payload[0..msg.len()].copy_from_slice(msg.as_bytes()); for i in msg.len()..64 { self.payload[i] = 0; } } } fn get_raw_socket() -> c_int { unsafe { socket(AF_INET6, SOCK_RAW, IPPROTO_RAW) } } fn sum_bytes_for_checksum(sum: &mut u32, b: &[u8]) { let mut i = 0; while i < b.len() { let mut v: u32 = b[i].into(); i += 1; *sum += v << 8; v = b[i].into(); *sum += v; i += 1; } if *sum > 0xffff { *sum -= 0xffff; } } fn sum_u16_for_checksum(sum: &mut u32, b: u16) { *sum += b as u32; if *sum > 0xffff { *sum -= 0xffff; } } fn compute_checksum(packet: &NetconsPacket) -> u16 { let mut sum = 0u32; sum_bytes_for_checksum(&mut sum, &packet.l3.src); sum_bytes_for_checksum(&mut sum, &packet.l3.dst); sum_u16_for_checksum(&mut sum, (packet.payload.len() + 8) as u16); sum_u16_for_checksum(&mut sum, IPPROTO_UDP as u16); sum_u16_for_checksum(&mut sum, packet.l4.src_port.to_be()); sum_u16_for_checksum(&mut sum, packet.l4.dst_port.to_be()); sum_u16_for_checksum(&mut sum, packet.l4.len.to_be()); let mut payload_buf = Cursor::new(packet.payload); while let Ok(value) = payload_buf.read_u16::() { sum_u16_for_checksum(&mut sum, value); } if sum == 0 { sum = 65535; } !(sum as u16).to_be() } fn send_packet(fd: c_int, packet: &NetconsPacket, sockaddr: &sockaddr_in6) { unsafe { let pkt_ptr = (packet as *const NetconsPacket) as *const c_void; let pkt_size = size_of::(); // libc::sendto requires a sockaddr pointer, but here we must use a sockaddr_in6, // this might be a bad implementation of rust libc. let sockaddr_ptr = &*((sockaddr as *const sockaddr_in6) as *const sockaddr); let sockaddr_size: u32 = size_of::() .try_into() .expect("Could not convert size of sockaddr_in6 to u32."); let _ = sendto(fd, pkt_ptr, pkt_size, 0, sockaddr_ptr, sockaddr_size); }; } fn make_sockaddr_in6(dst_ip: [u8; 16]) -> sockaddr_in6 { sockaddr_in6 { sin6_family: AF_INET6 as u16, sin6_port: 0, sin6_flowinfo: 0, sin6_addr: in6_addr { s6_addr: dst_ip }, sin6_scope_id: 0, } } pub fn blast_worker(config: WorkerConfig) { let fd = get_raw_socket(); let mut dst_ip = [0u8; 16]; dst_ip[15] = 1; let addr = make_sockaddr_in6(dst_ip); let mut packet = NetconsPacket::new(dst_ip, config.dst_port); packet.l3.src[15] = config.id; for i in 0u64..config.packets_count { let msg = if config.extended_msg { format!("{},{},{},-;hello packet {} {}\n", 4, i, i, config.id, i) } else { format!("[{}] hello packet {} {}\n", i, config.id, i) }; packet.set_payload(&msg); for j in 0..config.sender_addr_rnd_bytes { packet.l3.src[j] = rand::random(); } packet.update_checksum(); send_packet(fd, &packet, &addr); if let Some(t) = config.sleep_duration { sleep(t); } } } netconsd-0.4.1/rust/blaster/src/main.rs000066400000000000000000000036041457170767400200700ustar00rootroot00000000000000/* * Simple utility that sends netconsole messages to localhost. * * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ use std::thread; use std::time::Duration; use std::time::Instant; use clap::Parser; use libblaster::blast_worker; use libblaster::WorkerConfig; #[derive(Parser)] struct CliArgs { #[clap(short, long, default_value_t = 1)] threads: u16, #[clap(short, long, default_value_t = std::u64::MAX)] packets: u64, #[clap(short = 'u', long, default_value_t = 6666u16)] port: u16, #[clap(short, long)] sleep_time_nano: Option, #[clap(short = 'r', long, default_value_t = 0)] sender_ip_rnd_bytes: usize, } fn format_duration(duration: &Duration) -> String { let mins = duration.as_secs() / 60; let secs = duration.as_secs() % 60; let ms = duration.as_millis() % 1000; format!("{:02}:{:02}.{:03}", mins, secs, ms) } fn main() { let args = CliArgs::parse(); let mut workers: Vec> = Vec::new(); let sleep_duration = args.sleep_time_nano.map(Duration::from_nanos); let start_time = Instant::now(); for i in 0..args.threads { let config = WorkerConfig { id: i as u8, packets_count: args.packets, dst_port: args.port, sleep_duration, extended_msg: true, sender_addr_rnd_bytes: args.sender_ip_rnd_bytes, }; workers.push(thread::spawn(move || { blast_worker(config); })) } for w in workers { let _ = w.join(); } let packets_sent = args.packets * args.threads as u64; println!( "Sent {} packets with {} threads in {}", packets_sent, args.threads, format_duration(&start_time.elapsed()) ); } netconsd-0.4.1/rust/example_module/000077500000000000000000000000001457170767400173505ustar00rootroot00000000000000netconsd-0.4.1/rust/example_module/.gitignore000066400000000000000000000000231457170767400213330ustar00rootroot00000000000000target/ Cargo.lock netconsd-0.4.1/rust/example_module/Cargo.toml000066400000000000000000000003741457170767400213040ustar00rootroot00000000000000[package] name = "example_module" version = "0.1.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] netconsd_module = {path = "../module"} [lib] crate-type = ["dylib"] netconsd-0.4.1/rust/example_module/src/000077500000000000000000000000001457170767400201375ustar00rootroot00000000000000netconsd-0.4.1/rust/example_module/src/lib.rs000066400000000000000000000023461457170767400212600ustar00rootroot00000000000000/* * A minimal example of a Rust netconsd module. * * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ use netconsd_module::c_int; use netconsd_module::format_in6_addr_ptr; use netconsd_module::in6_addr; use netconsd_module::MsgBuf; use netconsd_module::NcrxMsg; fn fmt_ptr(ptr: *const T) -> String { match unsafe { ptr.as_ref() } { None => "NULL".to_owned(), Some(x) => format!("{}", x), } } #[no_mangle] pub extern "C" fn netconsd_output_init(nr_workers: c_int) -> c_int { println!( "Rust example module init! netconsd will use {} workers", nr_workers ); 0 } #[no_mangle] pub extern "C" fn netconsd_output_handler( t: c_int, in6_addr: *const in6_addr, buf: *const MsgBuf, msg: *const NcrxMsg, ) -> i32 { println!( "Received message from {} on thread {}", format_in6_addr_ptr(in6_addr), t ); println!("Buf: {}", fmt_ptr(buf)); println!("Msg: {}", fmt_ptr(msg)); 0 } #[no_mangle] pub extern "C" fn netconsd_output_exit() { println!("Rust example module bye bye"); } netconsd-0.4.1/rust/module/000077500000000000000000000000001457170767400156355ustar00rootroot00000000000000netconsd-0.4.1/rust/module/.gitignore000066400000000000000000000000231457170767400176200ustar00rootroot00000000000000target/ Cargo.lock netconsd-0.4.1/rust/module/Cargo.toml000066400000000000000000000003101457170767400175570ustar00rootroot00000000000000[package] name = "netconsd_module" version = "0.1.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] libc = "0.2.40" netconsd-0.4.1/rust/module/src/000077500000000000000000000000001457170767400164245ustar00rootroot00000000000000netconsd-0.4.1/rust/module/src/lib.rs000066400000000000000000000065101457170767400175420ustar00rootroot00000000000000/* * These structs have been generated with bindgen (except for the bitfields getters), * and are passed to netconsd_output_handler function defined in a netconsd module. * * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ use std::ffi::CStr; use std::fmt; use std::os::raw::c_char; pub use std::os::raw::c_int; use std::os::raw::c_void; pub use libc::in6_addr; use libc::iovec; pub use libc::sockaddr_in6; #[repr(C)] pub struct MsgBuf { pub next: *const MsgBuf, pub iovec: iovec, pub src: sockaddr_in6, pub hole: [u8; 4], pub rcv_time: u64, pub rcv_flags: c_int, pub rcv_bytes: c_int, } impl fmt::Debug for MsgBuf { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("MsgBuf") .field("src", &std::net::Ipv6Addr::from(self.src.sin6_addr.s6_addr)) .field("rcv_time", &self.rcv_time) .field("rcv_flags", &self.rcv_flags) .field("rcv_bytes", &self.rcv_bytes) .finish() } } #[derive(Debug)] #[repr(C)] pub struct NcrxList { pub next: *mut NcrxList, pub prev: *mut NcrxList, } #[derive(Debug)] #[repr(C)] pub struct NcrxMsg { pub seq: u64, pub ts_usec: u64, pub text: *const c_char, pub dict: *const c_char, pub text_len: c_int, pub dict_len: c_int, pub facility: u8, pub level: u8, pub flags: u8, pub node: NcrxList, pub rx_at_mono: u64, pub rx_at_real: u64, pub ncfrag_off: c_int, pub ncfrag_len: c_int, pub ncfrag_left: c_int, pub _bitfield_align_2: [u8; 0], pub _bitfield_2: [u8; 1usize], } impl NcrxMsg { pub fn get_cont_start(&self) -> bool { self.flags & 0b1 > 0 } pub fn get_cont(&self) -> bool { self.flags & 0b10 > 0 } pub fn get_oos(&self) -> bool { self.flags & 0b100 > 0 } pub fn get_seq_reset(&self) -> bool { self.flags & 0b1000 > 0 } } pub fn format_in6_addr_ptr(ptr: *const in6_addr) -> String { match unsafe { ptr.as_ref() } { None => "NULL".to_owned(), Some(x) => format!("{:x?}", x.s6_addr), } } impl fmt::Display for NcrxMsg { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { let cont_start = if self.get_cont_start() { "[CONT_START]" } else { "" }; let cont = if self.get_cont() { "[CONT]" } else { "" }; let oos = if self.get_oos() { "[OOS]" } else { "" }; let seq_reset = if self.get_seq_reset() { "[SEQ_RESET]" } else { "" }; let text = str_from_c_void(self.text as *const c_void); write!( formatter, "S{} T{} F{}/L{}{}{}{}{}: {}", self.seq, self.ts_usec, self.facility, self.level, cont_start, cont, oos, seq_reset, text ) } } fn str_from_c_void(ptr: *const c_void) -> &'static str { unsafe { CStr::from_ptr(ptr as *const c_char).to_str().unwrap() } } impl fmt::Display for MsgBuf { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { write!(formatter, "{}", str_from_c_void(self.iovec.iov_base)) } } netconsd-0.4.1/rust/self_test_module/000077500000000000000000000000001457170767400177055ustar00rootroot00000000000000netconsd-0.4.1/rust/self_test_module/.gitignore000066400000000000000000000000241457170767400216710ustar00rootroot00000000000000/target /Cargo.lock netconsd-0.4.1/rust/self_test_module/Cargo.toml000066400000000000000000000004451457170767400216400ustar00rootroot00000000000000[package] name = "self_test_module" version = "0.1.0" edition = "2021" [dependencies] once_cell = "1.15.0" libc = "0.2.40" netconsd_module = {path = "../module"} netconsblaster = {path = "../blaster"} nix = {version = "0.25.0", features = ["signal", "process"]} [lib] crate-type = ["dylib"] netconsd-0.4.1/rust/self_test_module/src/000077500000000000000000000000001457170767400204745ustar00rootroot00000000000000netconsd-0.4.1/rust/self_test_module/src/lib.rs000066400000000000000000000141251457170767400216130ustar00rootroot00000000000000/* * A netconsd module that sends messages to loopback and verifies that it * receives all the messages. * E.g. ./netconsd -w 2 -l 2 -u 6666 self_test_module.so * * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ use std::ffi::CStr; use std::os::raw::c_char; use std::os::raw::c_int; use std::sync::atomic::AtomicBool; use std::sync::atomic::AtomicUsize; use std::sync::atomic::Ordering; use std::sync::Mutex; use std::sync::MutexGuard; use std::thread; use std::time::Duration; use libblaster::blast_worker; use libblaster::WorkerConfig; use libc::in6_addr; use netconsd_module::MsgBuf; use netconsd_module::NcrxMsg; use nix::sys::signal; use nix::unistd::Pid; use once_cell::sync::Lazy; const MESSAGES_TO_SEND_PER_THREAD: usize = 100; const SENDER_THREADS_COUNT: usize = 10; const TOT_MESSAGES_TO_SEND: usize = SENDER_THREADS_COUNT * MESSAGES_TO_SEND_PER_THREAD; static TERMINATED: AtomicBool = AtomicBool::new(false); static INVOCATIONS_COUNT: AtomicUsize = AtomicUsize::new(0); const TIMEOUT_DURATION: Duration = Duration::from_secs(5); const BLASTER_SLEEP_DURATION: Duration = Duration::from_nanos(10); const SENDR_ADDR_RND_BYTES: usize = 0; #[derive(Clone, Debug)] struct ReceivedMessage { src: [u8; 16], msg: String, } static RECEIVED_MESSAGES: Lazy>> = Lazy::new(|| Mutex::new(Vec::new())); fn result_str(result: bool) -> &'static str { if result { "-> OK" } else { "-> FAILURE" } } fn get_received_messages() -> MutexGuard<'static, Vec> { RECEIVED_MESSAGES .lock() .expect("Could not lock RECEIVED_MESSAGES") } fn verify_invocations_count() -> bool { let invocations_count = INVOCATIONS_COUNT.load(Ordering::SeqCst); let passed = invocations_count == TOT_MESSAGES_TO_SEND; print!("{} invocations ", invocations_count); if !passed { print!("but should have been {}", TOT_MESSAGES_TO_SEND); } println!("{}", result_str(passed)); passed } fn verify_received_messages_texts() -> bool { let mut passed = true; for message in get_received_messages().iter() { if !message.msg.starts_with("hello packet") { println!("unexpected message: {}", message.msg); passed = false; } } println!("messages texts {}", result_str(passed)); passed } fn verify_received_messages_count() -> bool { let messages_count = get_received_messages().len(); print!("received {} ncrx messages ", messages_count); let passed = messages_count == TOT_MESSAGES_TO_SEND; if !passed { print!("but should have been {} ", TOT_MESSAGES_TO_SEND); } println!("{}", result_str(passed)); passed } fn check_src_address(addr: &[u8; 16]) -> Option { if addr[0..15].iter().any(|x| *x != 0) { return None; } Some(addr[15].into()) } fn verify_received_messages_addresses() -> bool { let mut passed = true; let mut seen_ids = vec![0; SENDER_THREADS_COUNT]; for msg in get_received_messages().iter() { match check_src_address(&msg.src) { Some(id) => seen_ids[id] += 1, None => { println!("Bad src address: {:x?}", msg.src); passed = false; } } } for (i, seen_id) in seen_ids.into_iter().enumerate() { if seen_id != MESSAGES_TO_SEND_PER_THREAD { println!( "got {} messages from thread {}, should have been {}", seen_id, i, MESSAGES_TO_SEND_PER_THREAD ); passed = false; } } println!("src addresses {}", result_str(passed)); passed } fn end_self_test() { if !TERMINATED.fetch_or(true, Ordering::SeqCst) { signal::kill(Pid::this(), signal::SIGTERM).expect("Could not send SIGTERM"); } } fn process_received_message(addr_ptr: *const in6_addr, msg_ptr: *const NcrxMsg) { if let Some(addr) = unsafe { addr_ptr.as_ref() } { if let Some(msg) = unsafe { msg_ptr.as_ref() } { let text = match unsafe { CStr::from_ptr(msg.text as *const c_char) }.to_str() { Ok(x) => x, Err(_) => { println!("Could not convert NcrxMsg msg to string"); "" } }; let received_message = ReceivedMessage { src: addr.s6_addr.clone(), msg: text.to_owned(), }; get_received_messages().push(received_message); } } } fn spawn_blast_workers() { for i in 0..SENDER_THREADS_COUNT { thread::spawn(move || { let config = WorkerConfig { id: i as u8, packets_count: MESSAGES_TO_SEND_PER_THREAD as u64, dst_port: 6666, sleep_duration: Some(BLASTER_SLEEP_DURATION), extended_msg: true, sender_addr_rnd_bytes: SENDR_ADDR_RND_BYTES, }; thread::sleep(Duration::from_millis(100)); blast_worker(config); }); } } fn start_test_timeout() { thread::spawn(|| { thread::sleep(TIMEOUT_DURATION); end_self_test(); }); } #[no_mangle] pub extern "C" fn netconsd_output_init(nr_workers: c_int) -> c_int { println!("Selftest module init! {} workers", nr_workers); spawn_blast_workers(); start_test_timeout(); 0 } #[no_mangle] pub extern "C" fn netconsd_output_handler( _t: c_int, in6_addr: *const in6_addr, _buf: *const MsgBuf, msg: *const NcrxMsg, ) -> i32 { process_received_message(in6_addr, msg); if INVOCATIONS_COUNT.fetch_add(1, Ordering::SeqCst) >= TOT_MESSAGES_TO_SEND - 1 { end_self_test(); } 0 } #[no_mangle] pub extern "C" fn netconsd_output_exit() { println!("\nSELF TEST RESULT"); let mut passed = verify_invocations_count(); passed &= verify_received_messages_count(); passed &= verify_received_messages_texts(); passed &= verify_received_messages_addresses(); println!(); if !passed { std::process::exit(1); } } netconsd-0.4.1/threads.c000066400000000000000000000132601457170767400151530ustar00rootroot00000000000000/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #include #include #include #include #include #include #include #include "include/common.h" #include "include/msgbuf-struct.h" #include "include/listener.h" #include "include/worker.h" #include "include/threads.h" struct tctl { int nr_listeners; int nr_workers; struct ncrx_listener *listeners; struct ncrx_worker *workers; }; static void wake_thread(struct ncrx_listener *listener, int worker) { struct ncrx_worker *tgt = &listener->workers[worker]; assert_pthread_mutex_locked(&tgt->queuelock); debug("Waking thread %d\n", worker); pthread_cond_signal(&tgt->cond); } static void push_prequeue_to_worker(struct ncrx_listener *listener, int worker) { struct ncrx_worker *tgt = &listener->workers[worker]; struct ncrx_prequeue *prequeue = &listener->prequeues[worker]; assert_pthread_mutex_locked(&tgt->queuelock); if (tgt->queue_head) tgt->queue_tail->next = prequeue->queue_head; else tgt->queue_head = prequeue->queue_head; tgt->queue_tail = prequeue->queue_tail; prequeue->queue_head = NULL; debug("Listener %d pushed %d pkts to worker %d (backlog: %d)\n", listener->thread_nr, prequeue->count, worker->thread_nr, tgt->nr_queued); tgt->nr_queued += prequeue->count; prequeue->count = 0; } static void enqueue_and_wake_worker(struct ncrx_listener *listener, int worker) { struct ncrx_worker *tgt = &listener->workers[worker]; pthread_mutex_lock(&tgt->queuelock); push_prequeue_to_worker(listener, worker); wake_thread(listener, worker); pthread_mutex_unlock(&tgt->queuelock); } static int prequeue_is_empty(struct ncrx_listener *listener, int worker) { struct ncrx_prequeue *prequeue = &listener->prequeues[worker]; return prequeue->queue_head == NULL; } void enqueue_and_wake_all(struct ncrx_listener *listener) { int i; for (i = 0; i < listener->nr_workers; i++) if (!prequeue_is_empty(listener, i)) enqueue_and_wake_worker(listener, i); } static void stop_and_wait_for_workers(struct tctl *ctl) { int i; uint64_t total_processed = 0, total_hosts = 0; for (i = 0; i < ctl->nr_workers; i++) { pthread_mutex_lock(&ctl->workers[i].queuelock); ctl->workers[i].stop = 1; pthread_cond_signal(&ctl->workers[i].cond); pthread_mutex_unlock(&ctl->workers[i].queuelock); pthread_join(ctl->workers[i].id, NULL); pthread_mutex_destroy(&ctl->workers[i].queuelock); pthread_cond_destroy(&ctl->workers[i].cond); pthread_condattr_destroy(&ctl->workers[i].condattr); total_processed += ctl->workers[i].processed; total_hosts += ctl->workers[i].hosts_seen; log("Exiting worker %d got %" PRIu64 " msgs from %" PRIu64 " hosts\n", i, ctl->workers[i].processed, ctl->workers[i].hosts_seen); } log("Total messages processed by workers: %" PRIu64 " from %" PRIu64 " hosts\n", total_processed, total_hosts); free(ctl->workers); } static void stop_and_wait_for_listeners(struct tctl *ctl) { int i; uint64_t total_processed = 0; for (i = 0; i < ctl->nr_listeners; i++) { ctl->listeners[i].stop = 1; pthread_kill(ctl->listeners[i].id, SIGUSR1); pthread_join(ctl->listeners[i].id, NULL); free(ctl->listeners[i].prequeues); total_processed += ctl->listeners[i].processed; log("Exiting listener %d queued %" PRIu64 " messages\n", i, ctl->listeners[i].processed); } log("Total messages processed by listeners: %" PRIu64 "\n", total_processed); free(ctl->listeners); } static void create_worker_threads(struct tctl *ctl, struct netconsd_params *p) { struct ncrx_worker *cur, *workers; int i, r; workers = calloc(p->nr_workers, sizeof(*workers)); if (!workers) fatal("Couldn't allocate thread structures\n"); for (i = 0; i < p->nr_workers; i++) { cur = &workers[i]; pthread_mutex_init(&cur->queuelock, NULL); pthread_condattr_init(&cur->condattr); pthread_condattr_setclock(&cur->condattr, CLOCK_MONOTONIC); pthread_cond_init(&cur->cond, &cur->condattr); cur->queue_head = NULL; cur->thread_nr = i; cur->gc_int_ms = p->gc_int_ms; cur->gc_age_ms = p->gc_age_ms; cur->lastgc = p->gc_int_ms ? now_mono_ms() / p->gc_int_ms : 0; r = pthread_create(&cur->id, NULL, ncrx_worker_thread, cur); if (r) fatal("%d/%d failed: -%d\n", i, p->nr_workers, r); } ctl->nr_workers = p->nr_workers; ctl->workers = workers; } static void create_listener_threads(struct tctl *ctl, struct netconsd_params *p) { struct ncrx_prequeue *prequeues; struct ncrx_listener *cur, *listeners; int i, r; listeners = calloc(p->nr_listeners, sizeof(*listeners)); if (!listeners) fatal("Couldn't allocate listeners: %m\n"); for (i = 0; i < p->nr_listeners; i++) { cur = &listeners[i]; prequeues = calloc(ctl->nr_workers, sizeof(*prequeues)); if (!prequeues) fatal("ENOMEM %d/%d\n", i, p->nr_listeners); cur->thread_nr = i; cur->prequeues = prequeues; cur->workers = ctl->workers; cur->nr_workers = ctl->nr_workers; cur->batch = p->mmsg_batch; cur->address = &p->listen_addr; r = pthread_create(&cur->id, NULL, udp_listener_thread, cur); if (r) fatal("%d/%d failed: -%d\n", i, p->nr_listeners, r); } ctl->nr_listeners = p->nr_listeners; ctl->listeners = listeners; } void destroy_threads(struct tctl *ctl) { stop_and_wait_for_listeners(ctl); stop_and_wait_for_workers(ctl); free(ctl); } struct tctl *create_threads(struct netconsd_params *p) { struct tctl *ret; ret = calloc(1, sizeof(*ret)); if (!ret) fatal("Couldn't allocate thread structures\n"); ret->nr_workers = p->nr_workers; create_worker_threads(ret, p); create_listener_threads(ret, p); return ret; } netconsd-0.4.1/util/000077500000000000000000000000001457170767400143305ustar00rootroot00000000000000netconsd-0.4.1/util/Makefile000066400000000000000000000003531457170767400157710ustar00rootroot00000000000000CFLAGS ?= -O2 -fPIC CFLAGS += -D_GNU_SOURCE CPPFLAGS ?= LDFLAGS ?= LIBS = -lpthread all: netconsblaster netconsblaster: $(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) netconsblaster.c $(LIBS) -o netconsblaster clean: rm -f netconsblaster netconsd-0.4.1/util/netconsblaster.c000066400000000000000000000250671457170767400175340ustar00rootroot00000000000000/* * netconsblaster: A test excerciser for netconsd and libncrx * * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define fatal(...) \ do { \ printf(__VA_ARGS__); \ exit(EXIT_FAILURE); \ } while (0) static uint64_t rand64(unsigned int *seed) { uint64_t ret; ret = (uint64_t) rand_r(seed) << 32 | rand_r(seed); return ret; } static uint64_t now_epoch_ms(void) { struct timespec t; clock_gettime(CLOCK_MONOTONIC, &t); return t.tv_sec * 1000 + t.tv_nsec / 1000000L; } static int ones_complement_sum(uint16_t *data, int len, int sum) { unsigned int tmp; int i; for (i = 0; i < len / 2; i++) { tmp = ntohs(data[i]); /* * Kill -0 */ if (tmp == 65535) tmp = 0; sum += tmp; if (sum >= 65536) { sum &= 65535; sum++; } } if (len & 1) fatal("Use test data with even lengths please\n"); return sum; } /* * From RFC768: "Checksum is the 16-bit one's complement of the one's * complement sum of a pseudo header of information from the IP header, the UDP * header, and the data, padded with zero octets at the end (if necessary) to * make a multiple of two octets." * * See RFC2460 section 8.1 for definition of pseudoheader for IPv6. * * In case you're wondering why I bothered with this: "Unlike IPv4, when UDP * packets are originated by an IPv6 node, the UDP checksum is NOT optional. * IPv6 receivers MUST discard packets containing a zero checksum." * * @addrs: Pointer to the begnning of the src/dst addresses in the ipv6hdr * @udppkt: Pointer to the udphdr * @len: Length of the udphdr and its payload */ static int udp_csum(void *addrptr, void *udppkt, int len) { unsigned int sum = 0; uint16_t *addrs = addrptr; uint16_t pseudohdr[4] = {0, htons(len), 0, htons(IPPROTO_UDP)}; sum = ones_complement_sum(addrs, 32, 0); sum = ones_complement_sum(pseudohdr, 8, sum); sum = ones_complement_sum(udppkt, len, sum); sum = ~sum; /* * From RFC768: "If the computed checksum is zero, it is transmitted as * all ones. An all zero transmitted checksum value means that the * transmitter generated no checksum" */ if (sum == 0) sum = 65535; return sum; } /* * Length of payload to send with every netconsole packet */ #define NETCONSLEN 64 /* * Layout of a raw netconsole packet */ struct netcons_packet { struct ip6_hdr l3; struct udphdr l4; char payload[]; } __attribute__((packed)); /* * Metadata for extended netconsole packets */ struct netcons_metadata { uint64_t seq; uint64_t ts; uint8_t cont; uint8_t lvl; }; static void bump_metadata(struct netcons_metadata *md) { md->seq++; md->ts += 1337; } /* * Filler text for packets. */ static const char *filler = "012345678901234567890123456789012345678901234567890123456789012"; /* * Numeric to symbol for the CONT flag */ static const char *contflag(int cont) { switch (cont) { case 0: /* * No CONT flag present */ return "-"; case 1: /* * CONT_START */ return "c"; case 2: /* * CONT */ return "+"; default: fatal("CONT value %d invalid?\n", cont); }; } static void make_packet(struct netcons_packet *pkt, const struct in6_addr *src, const struct in6_addr *dst, const int16_t *dst_port, const struct netcons_metadata *md) { const int len = NETCONSLEN; unsigned int nr; memset(pkt, 0, sizeof(pkt->l3) + sizeof(pkt->l4)); memcpy(&pkt->l3.ip6_src, src, sizeof(*src)); memcpy(&pkt->l3.ip6_dst, dst, sizeof(*dst)); pkt->l3.ip6_vfc |= (6 << 4); pkt->l3.ip6_nxt = IPPROTO_UDP; pkt->l3.ip6_plen = htons(sizeof(pkt->l4) + len); pkt->l3.ip6_hlim = 64; nr = snprintf(pkt->payload, len - 1, "%d,%lu,%lu,%s;", md->lvl, md->seq, md->ts, contflag(md->cont)); if (nr < len) snprintf(pkt->payload + nr, len - nr, "%s", filler); pkt->payload[len - 1] = '\n'; pkt->l4.source = htons(6666); pkt->l4.dest = htons(*dst_port); pkt->l4.len = htons(sizeof(pkt->l4) + len); pkt->l4.check = htons(udp_csum(&pkt->l3.ip6_src, &pkt->l4, sizeof(pkt->l4) + len)); } static int write_packet(int sockfd, struct netcons_packet *pkt) { const int len = sizeof(pkt->l3) + sizeof(pkt->l4) + NETCONSLEN; struct sockaddr_in6 bogus = { .sin6_family = AF_INET6, }; memcpy(&bogus.sin6_addr, &pkt->l3.ip6_dst, sizeof(pkt->l3.ip6_dst)); return sendto(sockfd, pkt, len, 0, &bogus, sizeof(bogus)) != len; } static int get_raw_socket(void) { int fd; fd = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW); if (fd == -1) fatal("Couldn't get raw socket: %m\n"); return fd; } static struct netcons_packet *alloc_packet(void) { struct netcons_packet *ret; ret = malloc(sizeof(struct netcons_packet) + NETCONSLEN); if (!ret) fatal("ENOMEM allocating packet\n"); return ret; } static struct netcons_metadata *alloc_metadata_array(int bits) { struct netcons_metadata *ret; ret = calloc(1 << bits, sizeof(*ret)); if (!ret) fatal("ENOMEM allocating metadata\n"); return ret; } static uint64_t mask_long(uint64_t val, int bits) { uint64_t mask = (1UL << bits) - 1; #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ mask = __builtin_bswap64(mask); #endif return val & mask; } static uint64_t permute_addr(struct in6_addr *addr, int bits, unsigned int *seed) { uint64_t *punned; punned = (uint64_t *)&addr->s6_addr[16 - sizeof(uint64_t)]; *punned ^= mask_long(rand64(seed), bits); return mask_long(*punned, bits); } struct blaster_state { pthread_t id; int nr; struct in6_addr dst; struct in6_addr src; int16_t dst_port; unsigned int seed; long blastcount; int *stopptr; int bits; }; static void *blaster_thread(void *arg) { struct blaster_state *_blaster_state = arg; struct netcons_metadata *mdarr; struct netcons_packet *pkt; struct in6_addr src; long idx, count = 0; int fd; fd = get_raw_socket(); pkt = alloc_packet(); mdarr = alloc_metadata_array(_blaster_state->bits); memcpy(&src, &_blaster_state->src, sizeof(src)); _blaster_state->seed = syscall(SYS_gettid); while (!*_blaster_state->stopptr) { idx = permute_addr(&src, _blaster_state->bits, &_blaster_state->seed); make_packet(pkt, &src, &_blaster_state->dst, &_blaster_state->dst_port, &mdarr[idx]); bump_metadata(&mdarr[idx]); if (!write_packet(fd, pkt)) count++; if (_blaster_state->blastcount && count == _blaster_state->blastcount) break; } return (void*)count; } static struct params { int srcaddr_order; int thread_order; struct in6_addr src; struct in6_addr dst; int16_t dst_port; long blastcount; int stop_blasting; } params; static void parse_arguments(int argc, char **argv, struct params *p) { int i; const char *optstr = "o:s:d:t:n:p:"; const struct option optlong[] = { { .name = "help", .has_arg = no_argument, .val = 'h', }, { .name = NULL, }, }; /* * Defaults */ p->srcaddr_order = 16; p->thread_order = 0; p->dst_port = 1514; memcpy(&p->src, &in6addr_loopback, sizeof(in6addr_loopback)); memcpy(&p->dst, &in6addr_loopback, sizeof(in6addr_loopback)); p->blastcount = 0; p->stop_blasting = 0; while ((i = getopt_long(argc, argv, optstr, optlong, NULL)) != -1) { switch (i) { case 'o': /* * Controls the number of bits to randomly flip in the * actual IPv6 address of this machine. So the program * will effectively simulate 2^N clients. */ p->srcaddr_order = atoi(optarg); if (p->srcaddr_order > 64 - 8) fatal("Source address order too large\n"); break; case 't': /* * Split the work among 2^N worker threads. */ p->thread_order = atoi(optarg); if (p->thread_order > 8) fatal("Largest supported thread order is 8\n"); break; case 's': /* * Source address to permute the low N bits of. */ if (inet_pton(AF_INET6, optarg, &p->src) != 1) fatal("Bad src '%s': %m\n", optarg); break; case 'd': /* * Destination address for all generated packets. */ if (inet_pton(AF_INET6, optarg, &p->dst) != 1) fatal("Bad dst '%s': %m\n", optarg); break; case 'n': /* * Write N packets from each worker thread and exit. */ p->blastcount = atol(optarg); break; case 'p': /* * Set the destination UDP port for outgoing packets. */ p->dst_port = atoi(optarg); break; case 'h': puts("Usage: netconsblaster [-o srcaddr_bits] [-t thread_order]\n" " [-s srcaddr] [-d dstaddr]\n" " [-n pktcount] [-p dst_port]\n"); puts(" srcaddr_bits: Randomize low N bits of srcaddr"); puts(" thread_order: Split work among 2^N threads"); puts(" pktcount: Stop after N pkts per thread\n"); puts(" dst_port: The UDP destination port\n"); exit(0); default: fatal("Invalid command line parameters\n"); } } } static void stop_signal(__attribute__((__unused__))int signum) { params.stop_blasting = 1; } int main(int argc, char **argv) { int i, nr_threads, srcaddr_per_thread; uint64_t tmp, count, start, finish; struct blaster_state *threadstates, *threadstate; struct sigaction stopper = { .sa_handler = stop_signal, }; parse_arguments(argc, argv, ¶ms); nr_threads = 1 << params.thread_order; srcaddr_per_thread = params.srcaddr_order - params.thread_order; if (srcaddr_per_thread <= 0) fatal("More thread bits than srcaddr bits\n"); threadstates = calloc(nr_threads, sizeof(*threadstates)); if (!threadstates) fatal("ENOMEM allocating state for threads\n"); sigaction(SIGINT, &stopper, NULL); for (i = 0; i < nr_threads; i++) { threadstate = &threadstates[i]; memcpy(&threadstate->src, ¶ms.src, sizeof(threadstate->src)); memcpy(&threadstate->dst, ¶ms.dst, sizeof(threadstate->dst)); memcpy(&threadstate->dst_port, ¶ms.dst_port, sizeof(threadstate->dst_port)); threadstate->blastcount = params.blastcount; threadstate->stopptr = ¶ms.stop_blasting; threadstate->bits = srcaddr_per_thread; threadstate->src.s6_addr[15] = (unsigned char)i; threadstate->nr = i; if (pthread_create(&threadstate->id, NULL, blaster_thread, threadstate)) fatal("Thread %d/%d failed: %m\n", i, nr_threads); } count = 0; start = now_epoch_ms(); for (i = 0; i < nr_threads; i++) { pthread_join(threadstates[i].id, (void**)&tmp); count += tmp; } finish = now_epoch_ms(); printf("Wrote %lu packets (%lu pkts/sec)\n", count, count / (finish - start) * 1000UL); return 0; } netconsd-0.4.1/worker.c000066400000000000000000000316631457170767400150410ustar00rootroot00000000000000/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #include #include #include #include #include #include #include #include #include #include "include/common.h" #include "include/msgbuf-struct.h" #include "include/output.h" #include "include/worker.h" static const struct ncrx_param ncrx_param = { .nr_slots = 512, .retx_intv = NETCONS_RTO, .msg_timeout = NETCONS_RTO, .oos_timeout = NETCONS_RTO, }; /* * Keep it simple: just use a boring probing hashtable that resizes. */ struct timerlist { struct timerlist *prev; struct timerlist *next; uint64_t when; }; struct bucket { struct in6_addr src; struct ncrx *ncrx; uint64_t last_seen; struct timerlist timernode; }; struct hashtable { unsigned long order; unsigned long load; struct bucket table[]; }; static unsigned long hash_srcaddr(struct in6_addr *addr) { uint32_t *addrptr = (uint32_t *)addr; return jhash2(addrptr, sizeof(*addr) / sizeof(*addrptr), WORKER_SEED); } static unsigned long order_mask(int order) { return (1UL << order) - 1; } static unsigned long htable_mask(unsigned long hash, int order) { return hash & order_mask(order); } static unsigned long htable_hash(struct hashtable *h, struct in6_addr *s) { return htable_mask(hash_srcaddr(s), h->order); } static int srcaddr_compar(struct in6_addr *a, struct in6_addr *b) { return memcmp(a, b, sizeof(*a)); } static struct bucket *hlookup(struct hashtable *h, struct in6_addr *src) { unsigned long origidx, idx; origidx = htable_hash(h, src); idx = origidx; while (h->table[idx].ncrx && srcaddr_compar(&h->table[idx].src, src)) { idx = htable_mask(idx + 1, h->order); fatal_on(idx == origidx, "Worker hashtable is full\n"); } return &h->table[idx]; } /* * Use -1 to represent "no wake needed" */ static void reset_waketime(struct ncrx_worker *cur) { cur->wake.tv_sec = -1; } static uint64_t ms_from_timespec(struct timespec *t) { return t->tv_sec * 1000LL + t->tv_nsec / 1000000L; } /* * Update the waketime if @when is before the current waketime. * * We assume that CLOCK_MONOTONIC cannot wrap: strictly speaking this is wrong, * since POSIX allows the MONOTONIC clock to start from any arbitrary value; but * since it starts from zero on Linux I'm not going to jump through the hoops. */ static void maybe_update_wake(struct ncrx_worker *cur, uint64_t when) { uint64_t curwake = ms_from_timespec(&cur->wake); if ((int64_t)curwake >= 0LL && curwake <= when) return; cur->wake.tv_sec = when / 1000LL; cur->wake.tv_nsec = (when % 1000LL) * 1000000L; } static const struct timespec end_of_time = { .tv_sec = (time_t)((1ULL << ((sizeof(time_t) << 3) - 1)) - 1), }; static const struct timespec *next_waketime(struct ncrx_worker *cur) { if (cur->wake.tv_sec == -1) return &end_of_time; return &cur->wake; } static struct bucket *bucket_from_timernode(struct timerlist *node) { return container_of(node, struct bucket, timernode); } static void timerlist_init(struct timerlist *node) { node->next = node; node->prev = node; node->when = 0; } static int timerlist_empty(struct timerlist *node) { return node->next == node; } static void timerlist_append(struct timerlist *node, struct timerlist *list) { struct timerlist *prev = list->prev; fatal_on(!timerlist_empty(node), "Queueing node already on list\n"); node->next = list; node->prev = prev; prev->next = node; list->prev = node; } static void timerlist_del(struct timerlist *node) { struct timerlist *prev = node->prev; struct timerlist *next = node->next; prev->next = next; next->prev = prev; timerlist_init(node); } /* * Return the callback time of the newest item on the list */ static uint64_t timerlist_peek(struct timerlist *list) { if (timerlist_empty(list)) return 0; return list->prev->when; } #define timerlist_for_each(this, n, thead) \ for (this = (thead)->next, n = this->next; this != (thead); \ this = n, n = this->next) static struct timerlist *create_timerlists(void) { struct timerlist *ret; int i; ret = calloc(NETCONS_RTO, sizeof(*ret)); if (!ret) fatal("Unable to allocate timerlist\n"); for (i = 0; i < NETCONS_RTO; i++) timerlist_init(&ret[i]); return ret; } static void destroy_timerlists(struct timerlist *timerlist) { free(timerlist); } static struct hashtable *create_hashtable(int order, struct hashtable *old) { struct hashtable *new; struct bucket *bkt; unsigned long i; new = zalloc(sizeof(*new) + sizeof(struct bucket) * (1UL << order)); if (!new) fatal("Unable to allocate hashtable\n"); new->order = order; if (!old) return new; for (i = 0; i < (1UL << old->order); i++) { if (old->table[i].ncrx) { bkt = hlookup(new, &old->table[i].src); memcpy(bkt, &old->table[i], sizeof(*bkt)); /* * If the timernode wasn't on a list, initialize it as * empty for the new bucket. If it was, update its * neighbors to point to the new bucket. */ if (bkt->timernode.next == &old->table[i].timernode) { timerlist_init(&bkt->timernode); } else { bkt->timernode.next->prev = &bkt->timernode; bkt->timernode.prev->next = &bkt->timernode; } } } new->load = old->load; free(old); return new; } static void destroy_hashtable(struct hashtable *ht) { unsigned long i; for (i = 0; i < (1UL << ht->order); i++) if (ht->table[i].ncrx) ncrx_destroy(ht->table[i].ncrx); free(ht); } static void maybe_resize_hashtable(struct ncrx_worker *cur, unsigned long new) { unsigned long neworder; if ((cur->ht->load + new) >> (cur->ht->order - 2) < 3) return; /* * The hashtable is more than 75% full. Resize it such that it can take * @new additional client hosts and be less than 50% full. */ neworder = LONG_BIT - __builtin_clzl(cur->ht->load + new) + 1; cur->ht = create_hashtable(neworder, cur->ht); } static void hdelete(struct hashtable *h, struct bucket *victim) { struct bucket *old, *new; unsigned long origidx, idx; fatal_on(!victim->ncrx, "Attempt to delete free bucket\n"); if (!timerlist_empty(&victim->timernode)) timerlist_del(&victim->timernode); h->load--; ncrx_destroy(victim->ncrx); memset(victim, 0, sizeof(*victim)); /* * There's potential to be clever here, but for now just be pedantic and * rebucket any potentially probed entries. */ origidx = victim - h->table; idx = origidx; while (h->table[idx].ncrx) { old = &h->table[idx]; new = hlookup(h, &old->src); if (new != old) { memcpy(new, old, sizeof(*new)); memset(old, 0, sizeof(*old)); /* * If the timernode wasn't on a list, initialize it as * empty for the new bucket. If it was, update its * neighbors to point to the new bucket. */ if (new->timernode.next == &old->timernode) { timerlist_init(&new->timernode); } else { new->timernode.next->prev = &new->timernode; new->timernode.prev->next = &new->timernode; } } idx = htable_mask(idx + 1, h->order); fatal_on(idx == origidx, "Infinite loop in hdelete()\n"); } } /* * Simple garbage collection. This is meant to be rare (on the order of once per * hour), so maintaining an LRU list isn't worth the overhead: just blow through * the whole table. Worst case it's ~50MB. */ static void try_to_garbage_collect(struct ncrx_worker *cur) { unsigned long i, count = 0; uint64_t now, end; struct bucket *bkt; now = now_mono_ms(); for (i = 0; i < (1UL << cur->ht->order); i++) { bkt = &cur->ht->table[i]; if (bkt->ncrx && now - bkt->last_seen > cur->gc_age_ms) { hdelete(cur->ht, bkt); count++; } } end = now_mono_ms(); log("Worker %d GC'd %lu in %" PRIu64 "ms\n", cur->thread_nr, count, end - now); } static void maybe_garbage_collect(struct ncrx_worker *cur) { uint64_t nowgc; if (!cur->gc_int_ms) return; nowgc = now_mono_ms() / cur->gc_int_ms; if (nowgc > cur->lastgc) { try_to_garbage_collect(cur); cur->lastgc = nowgc; } } static void schedule_ncrx_callback(struct ncrx_worker *cur, struct bucket *bkt, uint64_t when) { struct timerlist *tgtlist; uint64_t now; if (when == UINT64_MAX) { /* * No callback needed. If we had one we no longer need it, so * just remove ourselves from the timerlist. */ if (!timerlist_empty(&bkt->timernode)) timerlist_del(&bkt->timernode); return; } /* * Never queue messages outside the current window. This clamp() is what * guarantees that the callbacks in the timerlists are strictly ordered * from least to most recent: at any given moment only one callback time * corresponds to each bucket, and time cannot go backwards. */ now = now_mono_ms(); when = clamp(when, now + 1, now + NETCONS_RTO); /* * If the bucket is already on a timerlist, we only requeue it if the * callback needs to happen earlier than the one currently queued. */ if (!timerlist_empty(&bkt->timernode)) { if (when > bkt->timernode.when) return; timerlist_del(&bkt->timernode); } tgtlist = &cur->tlist[when % NETCONS_RTO]; fatal_on(when < timerlist_peek(tgtlist), "Timerlist ordering broken\n"); bkt->timernode.when = when; timerlist_append(&bkt->timernode, tgtlist); maybe_update_wake(cur, when); } /* * Read any pending messages out of the bucket, and invoke the output pipeline * with the extended metadata. */ static void drain_bucket_ncrx(struct ncrx_worker *cur, struct bucket *bkt) { struct ncrx_msg *out; uint64_t when; while ((out = ncrx_next_msg(bkt->ncrx))) { execute_output_pipeline(cur->thread_nr, &bkt->src, NULL, out); free(out); } when = ncrx_invoke_process_at(bkt->ncrx); schedule_ncrx_callback(cur, bkt, when); } /* * Execute callbacks for a specific timerlist, until either the list is empty or * we reach an entry that was queued for a time in the future. */ static void do_ncrx_callbacks(struct ncrx_worker *cur, struct timerlist *list) { uint64_t now = now_mono_ms(); struct timerlist *tnode, *tmp; struct bucket *bkt; timerlist_for_each(tnode, tmp, list) { if (tnode->when > now) break; /* * Remove the bucket from the list first, since it might end up * being re-added to another timerlist by drain_bucket_ncrx(). */ timerlist_del(tnode); bkt = bucket_from_timernode(tnode); ncrx_process(NULL, now, 0, bkt->ncrx); drain_bucket_ncrx(cur, bkt); } } /* * We have no idea how large the queue we just processed was: it could have * taken tens of seconds. So we must handle wraparound in the tlist array. */ static uint64_t run_ncrx_callbacks(struct ncrx_worker *cur, uint64_t lastrun) { uint64_t i, now = now_mono_ms(); if (now == lastrun) goto out; fatal_on(now < lastrun, "Time went backwards\n"); /* * It's possible we wrapped: in that case, we simply iterate over the * entire wheel and drain each list until we hit a callback after now. * Otherwise, we only iterate over the buckets that lie on [last,now]. */ for (i = max(lastrun, now - NETCONS_RTO + 1); i <= now; i++) do_ncrx_callbacks(cur, &cur->tlist[i % NETCONS_RTO]); out: return now; } static void consume_msgbuf(struct ncrx_worker *cur, struct msg_buf *buf) { struct bucket *ncrx_bucket; ncrx_bucket = hlookup(cur->ht, &buf->src.sin6_addr); if (!ncrx_bucket->ncrx) { ncrx_bucket->ncrx = ncrx_create(&ncrx_param); timerlist_init(&ncrx_bucket->timernode); memcpy(&ncrx_bucket->src, &buf->src.sin6_addr, sizeof(ncrx_bucket->src)); cur->ht->load++; } ncrx_bucket->last_seen = buf->rcv_time; buf->buf[buf->rcv_bytes] = '\0'; if (!ncrx_process(buf->buf, now_mono_ms(), buf->rcv_time, ncrx_bucket->ncrx)) { drain_bucket_ncrx(cur, ncrx_bucket); return; } execute_output_pipeline(cur->thread_nr, &ncrx_bucket->src, buf, NULL); } static struct msg_buf *grab_prequeue(struct ncrx_worker *cur) { struct msg_buf *ret; assert_pthread_mutex_locked(&cur->queuelock); ret = cur->queue_head; cur->queue_head = NULL; return ret; } void *ncrx_worker_thread(void *arg) { struct ncrx_worker *cur = arg; struct msg_buf *curbuf, *tmp; uint64_t lastrun = now_mono_ms(); int nr_dequeued; cur->ht = create_hashtable(16, NULL); cur->tlist = create_timerlists(); reset_waketime(cur); pthread_mutex_lock(&cur->queuelock); while (!cur->stop) { pthread_cond_timedwait(&cur->cond, &cur->queuelock, next_waketime(cur)); reset_waketime(cur); morework: curbuf = grab_prequeue(cur); nr_dequeued = cur->nr_queued; cur->nr_queued = 0; pthread_mutex_unlock(&cur->queuelock); maybe_resize_hashtable(cur, nr_dequeued); while ((tmp = curbuf)) { consume_msgbuf(cur, curbuf); curbuf = curbuf->next; free(tmp); cur->processed++; } if (!cur->stop) { maybe_garbage_collect(cur); lastrun = run_ncrx_callbacks(cur, lastrun); } pthread_mutex_lock(&cur->queuelock); if (cur->queue_head) goto morework; } assert_pthread_mutex_locked(&cur->queuelock); fatal_on(cur->queue_head != NULL, "Worker queue not empty at exit\n"); cur->hosts_seen = cur->ht->load; destroy_timerlists(cur->tlist); destroy_hashtable(cur->ht); return NULL; }