smc-tools-1.2.2/0000755000175000017500000000000013554320160012457 5ustar rasplrasplsmc-tools-1.2.2/README.smctools0000644000175000017500000001044413554320160015204 0ustar rasplrasplSMC Tools (1.2.2) =================== The SMC Tools provided in this package allow execution of existing TCP applications over RoCE network without need to make changes in them. In addition tools are provided to display SMC socket information. This package consists of the tools: - libsmc-preload.so : preload library. - smc_run : preload library environment setup script. - smc_pnet : C program for PNET Table handling - smcss : C program for displaying the information about active SMC sockets. - smc_rnics : List available RDMA NICs The preload shared library libsmc-preload.so provides mapping of TCP socket operations to SMC sockets. The environment setup script smc_run sets up the preload environment for the libsmc-preload.so shared library before starting application. The smcss program is used to gather and display information about the SMC sockets. The smc_pnet program is used to create, destroy, and change the SMC-R PNET table. In addition the package contains the AF_SMC manpage ("man af_smc"). Bug Reports =========== See section 'Code Contributions'. Code Contributions ================== Code contributions will not be accepted for smctools. Therefore, please do not send DIFFs or code-snippets. Thank you! If you want to report bugs or suggest enhancements, please contact: linux390@de.ibm.com and put "[smctools]" as the first word in the subject line of your mail. For bug reports, at a minimum describe the scenario with instructions on how to reproduce. For enhancements, please describe the proposed change and its benefits. Release History: ================ 1.2.2 (2019-10-24) Changes: - Add bash autocompletion support Bug fixes: - smcss: Do not show connection mode for already closed sockets - smc_rnics: Set interface to "n/a" for ISM devices 1.2.1 (2019-04-15) Bug fixes: - smc_rnics: Install man page on s390 only - smc-preload: Handle behavior flags in type argument to socket() call - Makefile: Fixed install target on Ubuntu for platforms other than s390 - smc_pnet: Changes in support of kernel 5.1 1.2.0 (2019-02-08) Changes: - smc_rnics: Initial version added - smc_dbg: Initial version added Bug fixes: - smcss: Parse address family of ip address 1.1.0 (2018-06-29) Changes: - smcss: Add IPv6 support - preload: Add IPv6 support - smcss: Output format changed - preload: Rename preload library into libsmc-preload.so - Makefile: Improve distro compatibility - Makefile: Add SONAME to shared libraries - Makefile: Do not strip binaries on install - Makefile: Use LDFLAGS to allow addition of externally set link flags - libsmc..: Remove hardcoded reference to libc - Manpages: Formatting changes Bug fixes: - Makefile: Fix target 'install' dependencies - smcss: Fix "--version" output - smc_pnet: Fix "--version" output - smc_run: Append preload library to LD_PRELOAD instead of potentially overwriting pre-set values - preload: Set suid flag to work with suid executables 1.0.0 (2017-02-13) The initial version Parts: ====== The SMC Tools package includes the following files: Makefile smctools Makefile smc-preload.c libsmc-preload.so, SMC preload library source code smc_run SMC preload library environment setup script smc_pnet.c PNET Table handling source code smcss.c SMC socket display source code smctools_common.h common definitions for smctools smc.h include file from kernel include/uapi/linux/smc.h smc_diag.h include file from kernel include/uapi/linux/smc_diag.h af_smc.7 manpage for AF_SMC sockets smc_pnet.8 manpage for smc_pnet smcss.8 manpage for smcss smc_run.8 manpage for preload library usage smc_dbg collect debugging data Requirements: ============= - To compile and run applications over a RoCE network, PCI support, Mellanox Connect-X device support, Infiniband, and SMC support, must be enabled in the kernel. - To run the applications over a RoCE network, the RoCE network must be properly configured. Copyright IBM Corp. 2016, 2017 THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THE ENCLOSED ECLIPSE PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. smc-tools-1.2.2/smc_rnics0000755000175000017500000001015613554320160014370 0ustar rasplraspl#!/bin/bash # Copyright IBM Corp. 2018 function usage() { echo; echo "Usage: smc_rnics [ OPTIONS ] [ FID ]"; echo; echo "List all available RNICs"; echo; echo " -d, --disable disable the specified FID"; echo " -e, --enable enable the specified FID"; echo " -h, --help display this message"; echo " -r, --rawids display 'type' as raw vendor/device IDs"; echo " -v, --version display version info"; echo; } function print_rnic() { if [ $header -eq 0 ]; then printf "FID Power PCI ID PCHID Type Port PNET ID Interface\n"; echo '------------------------------------------------------------------------------------'; header=1; fi printf "%3x %-5s %-12s %-4s %-14s %-4s %-16s %s\n" "$((16#$fid))" "$power" "$addr" "$pchid" "$dev_type" "$port" "$pnet" "$int"; (( printed++ )); } function format_fid() { res="${1#0x}"; if [[ "$res" =~ ^([:xdigit:])+ ]]; then printf "Error: '%s' is not a valid FID\n" "$res" >&2; exit 3; fi res="`printf "%08x" $((16#$res))`"; } if [ "`uname -m`" != "s390x" ] && [ "`uname -m`" != "s390" ]; then printf "Error: s390/s390x supported only\n" >&2; exit 1; fi args=`getopt -u -o hrve:d: -l enable:,disable:,help,rawids,version -- $*`; [ $? -ne 0 ] && exit 2; set -- $args; action="print"; rawIDs=0; target=""; printed=0; while [ $# -gt 0 ]; do case $1 in "-e" | "--enable" ) action="enable"; fid=$2; shift;; "-d" | "--disable" ) action="disable"; fid=$2; shift;; "-h" | "--help" ) usage; exit 0;; "-r" | "--rawids" ) rawIDs=1;; "-v" | "--version" ) echo "smc_rnics utility, smc-tools-1.2.2 (4335826)"; exit 0;; "--" ) ;; * ) format_fid "$1"; target="$res"; esac shift; done if [ "$action" != "print" ]; then if [ "$target" != "" ]; then usage; exit 4; fi format_fid "$fid"; fid="$res"; ufid=`printf "%x" $((16#$fid))`; # representation without leading zeros if [ ! -d /sys/bus/pci/slots/$fid ]; then echo "Error: FID $ufid does not exist" >&2; exit 5; fi power=`cat /sys/bus/pci/slots/$fid/power 2>/dev/null`; val=0; [ "$action" == "enable" ] && val=1; if [ $power -eq $val ]; then echo "Error: FID $ufid is already ${action}d" >&2; exit 6; fi echo $val > /sys/bus/pci/slots/$fid/power 2>/dev/null; if [ $? -ne 0 ]; then echo "Error: Failed to $action FID $ufid" >&2; exit 7; fi exit 0; fi header=0; # iterate over slots, as powered-off devices won't show elsewhere for fid in `ls -1 /sys/bus/pci/slots`; do cd /sys/bus/pci/slots/$fid; fid="$fid"; if [ "$target" != "" ] && [ "$fid" != "$target" ]; then continue; fi power=`cat power`; interfaces=""; port=""; addr=""; int=""; if [ $power -eq 0 ]; then # device not yet hotplugged dev_type=""; pchid=""; pnet=""; print_rnic; continue; fi # device is hotplugged - locate it for dev in `ls -1 /sys/bus/pci/devices`; do cd /sys/bus/pci/devices/$dev; if [ "`cat function_id`" == "0x$fid" ]; then addr=$dev; break; fi done if [ "$addr" == "" ]; then echo "Error: No matching device found for FID $fid" >&2; continue; fi cd /sys/bus/pci/devices/$addr; id=`cat device`; vend=`cat vendor`; dev_type="${vend#0x}:${id#0x}"; if [ $rawIDs -eq 0 ]; then case "$vend" in "0x1014" ) # IBM case "$id" in "0x04ed") dev_type="ISM"; port="n/a"; int="n/a";; "0x044b") continue;; # zEDC esac;; "0x15b3" ) # Mellanox case "$id" in "0x1003" | \ "0x1004") dev_type="RoCE Express";; "0x1016") dev_type="RoCE Express2";; esac;; esac fi pchid="`cat pchid | sed 's/^0x//'`"; pnet="`cat util_string | tr -d '\000' | iconv -f IBM-1047 -t ASCII`"; if [ -d net ]; then interfaces="`ls -1 net`"; fi # one device can have multiple interfaces (one per port) if [ "$interfaces" != "" ]; then pnetids="$pnet"; for int in $interfaces; do cd /sys/bus/pci/devices/$addr/net/$int; if [ -e dev_port ]; then port=`cat dev_port`; (( idx=16*$port+1 )) (( end=$idx+15 )) pnet=`echo "$pnetids" | cut -c $idx-$end`; fi print_rnic; done continue; fi print_rnic; done if [ "$target" != "" ] && [ $printed -eq 0 ]; then exit 8; fi exit 0; smc-tools-1.2.2/smc-tools.autocomplete0000644000175000017500000000305413554320160017024 0ustar rasplrasplfunction _smc_pnet_complete_() { case "${COMP_WORDS[COMP_CWORD-1]}" in --interface*|-I*) COMPREPLY=($(compgen -W "$(ls -1 /sys/class/net/)" -- "${COMP_WORDS[COMP_CWORD]}")) return;; --ibdevice*|-D*) which smc_rnics >/dev/null if [ $? -eq 0 ]; then COMPREPLY=($(compgen -W "$(smc_rnics | tail -n +3 | awk '{print($3)}' | uniq)" -- "${COMP_WORDS[COMP_CWORD]}")) else COMPREPLY=($(compgen -W "$(ls -1 /sys/bus/pci/devices)" -- "${COMP_WORDS[COMP_CWORD]}")) fi return;; --ibport*|-P*) ;; esac COMPREPLY=($(compgen -W "--help --version --add --delete --show --flush --interface --ibdevice --ibport" -- "${COMP_WORDS[COMP_CWORD]}")) } function _smc_rnics_complete_() { case "${COMP_WORDS[COMP_CWORD-1]}" in --enable|-e) COMPREPLY=($(compgen -W "$(smc_rnics | grep -e "^ [[:space:]0-9a-f]\{2\} 0" | awk '{print($1)}')" -- "${COMP_WORDS[COMP_CWORD]}")) return;; --disable|-d) COMPREPLY=($(compgen -W "$(smc_rnics | grep -e "^ [[:space:]0-9a-f]\{2\} 1" | awk '{print($1)}')" -- "${COMP_WORDS[COMP_CWORD]}")) return;; esac COMPREPLY=($(compgen -W "--help --version --disable --enable --rawids" -- "${COMP_WORDS[COMP_CWORD]}")) } complete -W "--help --tgz --version" smc_dbg complete -W "--help --version --all --listening --debug --wide --smcd --smcr" smcss complete -F _smc_pnet_complete_ smc_pnet complete -F _smc_rnics_complete_ smc_rnics smc-tools-1.2.2/af_smc.70000644000175000017500000000776113554320160014012 0ustar rasplraspl.\" .\" Copyright IBM Corp. 2016, 2018 .\" Author(s): Ursula Braun .\" Thomas Richter .\" ---------------------------------------------------------------------- .TH AF_SMC 7 "January 2017" "smc-tools" "Linux Programmer's Manual" .SH NAME AF_SMC - Sockets for SMC communication .SH SYNOPSIS .B #include .sp .B "#define AF_SMC 43" .sp .B "#define SMCPROTO_SMC 0" .sp .B "#define SMCPROTO_SMC6 1" .PP .IB tcp_sockfd " = socket(" AF_SMC ", " SOCK_STREAM ", " SMCPROTO_SMC ); .sp .IB tcp_sockfd " = socket(" AF_SMC ", " SOCK_STREAM ", " SMCPROTO_SMC6 ); .SH DESCRIPTION .I Shared Memory Communication via RDMA (SMC) is a socket over the RDMA communication protocol that allows existing TCP socket applications to transparently benefit from RDMA when exchanging data over an .I RDMA over Converged Ethernet (RoCE) network. Those networks are not routable. SMC provides host-to-host direct memory access without traditional TCP/IP processing overhead. SMC offers preservation of existing IP topology and IP security, and introduces minimal administrative and operational changes. The exploitation of SMC is transparent to TCP socket applications. .PP The new address family .B AF_SMC supports the SMC protocol on Linux. It keeps the address format of .B AF_INET and .B AF_INET6 sockets and supports streaming socket types only. .SS Usage modes Two usage modes are possible: .IP "AF_SMC native usage" uses the socket domain .B AF_SMC instead of .B AF_INET and .BR AF_INET6 . Specify .B SMCPROTO_SMC for .B AF_INET compatible socket semantics, and .B SMC_PROTO_SMC6 for .B AF_INET6 respectively. .IP "Usage of AF_INET socket applications with SMC preload library" converts .B AF_INET and .B AF_INET6 sockets to .B AF_SMC sockets. The SMC preload library is part of the SMC tools package. .PP SMC socket capabilities are negotiated at connection setup. If one peer is not SMC capable, further socket processing falls back to TCP usage automatically. .SS Implementation details: Links and Link Groups To run RDMA traffic to a peer, a so-called .I link is established between a local RoCE card and a remote RoCE card. To enhance availability, you can configure alternate links with automatic fail over. Primary and backup links to a certain peer are combined in a so-called .I link .IR group . .SS RoCE adapter mapping: Creation of a pnet table The SMC protocol requires grouping of multiple physical networks - standard Ethernet and RoCE networks. Such groups are called .I Physical Networks (PNets). For SMC, RoCE adapter mapping is configured within a table called .BR "pnet table" . Any available Ethernet interface can be combined with available .I RDMA-capable network interface cards (RNICs), if they belong to the same Converged Ethernet fabric. To configure RoCE Adapter mapping, you must create a pnet table. Modify the table with the smc-tools command .IR smc_pnet . .PP For details see .BR smc_pnet (8). .SS Displaying SMC socket state information SMC socket state information can be obtained with the smc-tools command .IR smcss . For details see .BR smcss (8). .SS Starting a TCP application to work with SMC To use an existing TCP application to work with SMC, use the SMC preload library. The SMC Tools package provides the command .I smc_run to convert .B AF_INET and .B AF_INET6 socket calls to .B AF_SMC socket calls by means of the preload technique. For more information about the preload mechanism, see also .BR ld.so (8). .PP Example: .IP .B smc_run ftp .PP This command-line example starts an FTP client over SMC. .PP .SS MTU and Infiniband data transfer Infiniband traffic may use MTU values 256, 512, 1024, 2048, or 4096. SMC determines the configured MTU size of the RoCE Ethernet port, announces this MTU size to the peer during connection start, and chooses the minimum MTU size of both peers. .SH "SEE ALSO" .BR socket (2), .BR ip (7), .BR tcp (7), .BR socket (7), .BR smc_run (8), .BR smcss (8), .BR smc_pnet (8) .SH "HISTORY" .TP .B AF_SMC, version 1.0.0 .RS 4 .IP "\bu" 2 Initial version. .RE smc-tools-1.2.2/smc_rnics.80000644000175000017500000000346513554320160014540 0ustar rasplraspl.\" Copyright IBM Corp. 2018 .TH SMC_RNICS 8 "October 2018" "smc-tools" "Linux Programmer's Manual" .SH NAME smc_rnics \- list, enable and disable (R)NICS as used by SMC-R and SMC-D. .SH SYNOPSIS .B smc_rnics .RB [ \-hrv ] .RB [ \-d .IR FID ] .RB [ \-e .IR FID ] .RI [ FID ] .SH DESCRIPTION The SMC protocol requires an (R)NIC for the (R)DMA traffic. Use .B smc_rnics to handle, and to identify hotplugged (R)NICs. .SH OPTIONS By default, .B smc_rnics shows all available (R)NICs in the system. .TP .I FID Limit output to the specified function ID .RI ( FID ). .TP .BR "\-d, \-\-disable " \fIFID Set (R)NIC .I FID offline. .TP .BR "\-e, \-\-enable " \fIFID Set (R)NIC .I FID online. .TP .BR "\-h, \-\-help" Display a brief .B smc_rnics usage information. .TP .BR "\-r, \-\-rawids" Display raw PCI vendor and device codes in column .I Type . . .TP .BR "\-v, \-\-version" Display version information. .SH OUTPUT .SS "FID" Function ID. .SS "Power" Indicates whether the PCI slot is on (1) or off (0). See option .B -d on how to enable a device that is offline. .SS "PCI ID" PCI ID in BDF (Bus:Device.Function) notation. .SS "PCHID" Physical channel identifier, or virtual channel identifier (VCHID) for ISM devices. .SS "Type" Device type in human readable form. See option .B -r to switch to display of PCI vendor and device instead. .SS "Port" Corresponding port of an RNIC, if applicable. .SS "PNET ID" Physical network ID, if defined in IOCDS. .SS "Interface" Network interface in Linux, if applicable. .SH RETURN CODES On success, .B smc_rnics returns 0. If an error occurs, a return code other than 0 is returned. .P .SH AUTHOR .nf Copyright IBM Corp. 2018 Published under the terms and conditions of the EPL (Eclipse Public License). .fi .SH SEE ALSO .BR af_smc (7), .BR smc_pnet (8), .BR smc_run (8), .BR smcss (8) smc-tools-1.2.2/smc.h0000644000175000017500000000132713554320160013415 0ustar rasplraspl/* * Shared Memory Communications over RDMA (SMC-R) and RoCE * * Definitions for generic netlink based configuration of an SMC-R PNET table * * Copyright IBM Corp. 2017 * * Author(s): Thomas Richter */ #ifndef _UAPI_LINUX_SMC_H_ #define _UAPI_LINUX_SMC_H_ /* Netlink SMC_PNETID attributes */ enum { SMC_PNETID_UNSPEC, SMC_PNETID_NAME, SMC_PNETID_ETHNAME, SMC_PNETID_IBNAME, SMC_PNETID_IBPORT, __SMC_PNETID_MAX, SMC_PNETID_MAX = __SMC_PNETID_MAX - 1 }; enum { /* SMC PNET Table commands */ SMC_PNETID_GET = 1, SMC_PNETID_ADD, SMC_PNETID_DEL, SMC_PNETID_FLUSH }; #define SMCR_GENL_FAMILY_NAME "SMC_PNETID" #define SMCR_GENL_FAMILY_VERSION 1 #endif /* _UAPI_LINUX_SMC_H */ smc-tools-1.2.2/smc-preload.c0000644000175000017500000000466213554320160015041 0ustar rasplraspl/* * SMC Tools - Shared Memory Communication Tools * * Copyright IBM Corp. 2016, 2018 * * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html */ #include #include #include #include #include #include #include #include #include #include #include #include #include #define DLOPEN_FLAG RTLD_LAZY #ifndef AF_SMC #define AF_SMC 43 #endif #ifndef SMCPROTO_SMC #define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */ #define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */ #endif int (*orig_socket)(int domain, int type, int protocol); static void *dl_handle = NULL; static void initialize(void); static int debug_mode = 0; #define GET_FUNC(x) \ if (dl_handle) { \ char *err; \ dlerror(); \ orig_ ## x=dlsym(dl_handle,#x); \ if ((!orig_ ## x)&&(err=dlerror())) { \ fprintf(stderr, "dlsym failed on " #x ": %s\n",err); \ orig_ ## x=&emergency_ ## x; \ } \ } else { \ orig_ ## x=&emergency_ ## x; \ } static void dbg_msg(FILE *f, const char *format, ...) { va_list vl; if (debug_mode) { va_start(vl, format); vfprintf(f, format, vl); va_end(vl); } } static int emergency_socket(int domain, int type, int protocol) { errno = EINVAL; return -1; } int socket(int domain, int type, int protocol) { int rc; if (!dl_handle) initialize(); /* check if socket is eligible for AF_SMC */ if ((domain == AF_INET || domain == AF_INET6) && // see kernel code, include/linux/net.h, SOCK_TYPE_MASK (type & 0xf) == SOCK_STREAM && (protocol == IPPROTO_IP || protocol == IPPROTO_TCP)) { dbg_msg(stderr, "libsmc-preload: map sock to AF_SMC\n"); if (domain == AF_INET) protocol = SMCPROTO_SMC; else /* AF_INET6 */ protocol = SMCPROTO_SMC6; domain = AF_SMC; } rc = (*orig_socket)(domain, type, protocol); return rc; } static void set_debug_mode(const char *var_name) { char *var_value; var_value = getenv(var_name); debug_mode = 0; if (var_value != NULL) debug_mode = (var_value[0] != '0'); } static void initialize(void) { set_debug_mode("SMC_DEBUG"); dl_handle = dlopen(LIBC_SO, DLOPEN_FLAG); if (!dl_handle) dbg_msg(stderr, "dlopen failed: %s\n", dlerror()); GET_FUNC(socket); } smc-tools-1.2.2/smc_pnet.c0000644000175000017500000002075513554320160014444 0ustar rasplraspl/* * Shared Memory Communications over RDMA (SMC-R) and RoCE * * Copyright IBM Corp. 2017 * * Author(s): Thomas Richter * * User space program for SMC-R PNET Table manipulation with generic netlink. * * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "smctools_common.h" #include "smc.h" static char *progname; static struct pnetentry { char *pnetid; /* Pnetid */ char *ethname; /* Ethernet device name */ char *ibname; /* Infiniband/ISM device name */ int ibport; /* Infiniband device port number */ unsigned char cmd; /* Command to execute */ } pnetcmd; static void _usage(FILE *dest) { fprintf(dest, "Usage: %s [ OPTIONS ] [pnetid]\n" "\t-h, --help this message\n" "\t-V, --version show version information\n" "\t-a, --add add a pnetid entry, requires interface or ib/ism device\n" "\t-d, --delete delete a pnetid entry\n" "\t-s, --show show a pnetid entry\n" "\t-f, --flush flush the complete pnet table\n" "\t-I, --interface Ethernet interface name of a pnetid entry\n" "\t-D, --ibdevice Infiniband/ISM device name of a pnetid entry\n" "\t-P, --ibport Infiniband device port (default: 1)\n" "\t\n" "\tno OPTIONS show complete pnet table\n", progname); } static void help(void) __attribute__((noreturn)); static void help(void) { _usage(stdout); exit(EXIT_SUCCESS); } static void usage(void) __attribute__((noreturn)); static void usage(void) { _usage(stderr); exit(EXIT_FAILURE); } static int convert(char *string) { unsigned long no; char *endp; no = strtoul(string, &endp, 0); if (*endp != '\0' || no > 2) { fprintf(stderr, "%s invalid ib port:%s\n", progname, string); usage(); } return no; } static const struct option long_opts[] = { { "interface", 1, 0, 'I' }, { "ibdevice", 1, 0, 'D' }, { "ibport", 1, 0, 'P' }, { "flush", 0, 0, 'f' }, { "add", 0, 0, 'a'}, { "show", 0, 0, 's'}, { "delete", 0, 0, 'd'}, { "version", 0, 0, 'V' }, { "help", 0, 0, 'h' }, { NULL, 0, NULL, 0} }; static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { [SMC_PNETID_NAME] = { .type = NLA_STRING, .maxlen = 17 }, [SMC_PNETID_ETHNAME] = { .type = NLA_STRING, .maxlen = 16 }, [SMC_PNETID_IBNAME] = { .type = NLA_STRING, .maxlen = 64 }, [SMC_PNETID_IBPORT] = { .type = NLA_U8, .maxlen = 1 } }; /* Netlink library call back handler to be called on data reception. */ static int cb_handler(struct nl_msg *msg, void *arg) { struct nlattr *attrs[SMC_PNETID_MAX + 1]; struct nlmsghdr *hdr = nlmsg_hdr(msg); if (genlmsg_validate(hdr, 0, SMC_PNETID_MAX, smc_pnet_policy) || genlmsg_parse(hdr, 0, attrs, SMC_PNETID_MAX, smc_pnet_policy) < 0) { fprintf(stderr, "%s: invalid data returned\n", progname); nl_msg_dump(msg, stderr); return NL_STOP; } printf("%s %s %s %d\n", nla_get_string(attrs[SMC_PNETID_NAME]), nla_get_string(attrs[SMC_PNETID_ETHNAME]), nla_get_string(attrs[SMC_PNETID_IBNAME]), nla_get_u8(attrs[SMC_PNETID_IBPORT])); return NL_OK; } static int genl_command(void) { int rc = EXIT_FAILURE, id, nlmsg_flags = 0; struct nl_sock *sk; struct nl_msg *msg; /* Allocate a netlink socket and connect to it */ sk = nl_socket_alloc(); if (!sk) { nl_perror(NLE_NOMEM, progname); return rc; } rc = genl_connect(sk); if (rc) { nl_perror(rc, progname); rc = EXIT_FAILURE; goto out1; } id = genl_ctrl_resolve(sk, SMCR_GENL_FAMILY_NAME); if (id < 0) { rc = EXIT_FAILURE; if (id == -NLE_OBJ_NOTFOUND) fprintf(stderr, "%s: SMC module not loaded\n", progname); else nl_perror(id, progname); goto out2; } nl_socket_modify_cb(sk, NL_CB_VALID, NL_CB_CUSTOM, cb_handler, NULL); /* Allocate a netlink message and set header information. */ msg = nlmsg_alloc(); if (!msg) { nl_perror(NLE_NOMEM, progname); rc = EXIT_FAILURE; goto out2; } if ((pnetcmd.cmd == SMC_PNETID_DEL || pnetcmd.cmd == SMC_PNETID_GET) && !pnetcmd.pnetid) /* List all */ nlmsg_flags = NLM_F_DUMP; if (!genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, id, 0, nlmsg_flags, pnetcmd.cmd, SMCR_GENL_FAMILY_VERSION)) { nl_perror(rc, progname); rc = EXIT_FAILURE; goto out3; } switch (pnetcmd.cmd) { /* Start message construction */ case SMC_PNETID_ADD: if (pnetcmd.ethname) rc = nla_put_string(msg, SMC_PNETID_ETHNAME, pnetcmd.ethname); if (rc < 0) { nl_perror(rc, progname); rc = EXIT_FAILURE; goto out3; } if (pnetcmd.ibname) rc = nla_put_string(msg, SMC_PNETID_IBNAME, pnetcmd.ibname); if (rc < 0) { nl_perror(rc, progname); rc = EXIT_FAILURE; goto out3; } if (pnetcmd.ibname) rc = nla_put_u8(msg, SMC_PNETID_IBPORT, pnetcmd.ibport); if (rc < 0) { nl_perror(rc, progname); rc = EXIT_FAILURE; goto out3; } /* Fall through intended */ case SMC_PNETID_DEL: case SMC_PNETID_GET: if (!pnetcmd.pnetid) /* List all */ break; rc = nla_put_string(msg, SMC_PNETID_NAME, pnetcmd.pnetid); if (rc < 0) { nl_perror(rc, progname); rc = EXIT_FAILURE; goto out3; } } /* Send message */ rc = nl_send_auto(sk, msg); if (rc < 0) { nl_perror(rc, progname); rc = EXIT_FAILURE; goto out3; } /* Receive reply message, returns number of cb invocations. */ rc = nl_recvmsgs_default(sk); /* Kernel commit a9d8b0b1e3d689346b016316bd91980d60c6885d * introduced a misbehavior that a FLUSH of an empty table * returned -ENOENT. Fix it in smc-tools as long as kernel patch did'nt * land in the distros. */ if (pnetcmd.cmd == SMC_PNETID_FLUSH && rc != -NLE_OBJ_NOTFOUND) rc = 0; if (rc < 0) { nl_perror(rc, progname); rc = EXIT_FAILURE; goto out3; } rc = EXIT_SUCCESS; out3: nlmsg_free(msg); out2: nl_close(sk); out1: nl_socket_free(sk); return rc; } int main(int argc, char **argv) { char *slash; int rc, ch; progname = (slash = strrchr(argv[0], '/')) ? slash + 1 : argv[0]; while ((ch = getopt_long(argc, argv, "I:D:P:fasdhvV", long_opts, NULL )) != EOF) { switch (ch) { case 'f': if (pnetcmd.cmd) usage(); pnetcmd.cmd = SMC_PNETID_FLUSH; break; case 's': if (pnetcmd.cmd) usage(); pnetcmd.cmd = SMC_PNETID_GET; pnetcmd.pnetid = optarg; break; case 'd': if (pnetcmd.cmd) usage(); pnetcmd.cmd = SMC_PNETID_DEL; pnetcmd.pnetid = optarg; break; case 'a': if (pnetcmd.cmd) usage(); pnetcmd.cmd = SMC_PNETID_ADD; pnetcmd.pnetid = optarg; break; case 'I': pnetcmd.ethname = optarg; break; case 'D': pnetcmd.ibname = optarg; break; case 'P': pnetcmd.ibport = convert(optarg); break; case 'v': case 'V': printf("smc_pnet utility, smc-tools-%s (%s)\n", RELEASE_STRING, RELEASE_LEVEL); exit(0); case 'h': help(); case '?': default: usage(); } } if (optind + 1 < argc) { fprintf(stderr, "%s too many parameters\n", progname); usage(); } if (optind + 1 == argc) pnetcmd.pnetid = argv[optind]; if (!pnetcmd.cmd) { if (optind < argc) { fprintf(stderr, "%s: parameters without option\n", progname); usage(); } pnetcmd.cmd = SMC_PNETID_GET; } if (pnetcmd.cmd == SMC_PNETID_FLUSH) { if (optind < argc) { fprintf(stderr, "%s: -f takes no parameters\n", progname); usage(); } } if (pnetcmd.cmd == SMC_PNETID_ADD) { if (!pnetcmd.ethname && !pnetcmd.ibname) { fprintf(stderr, "%s: interface or device missing\n", progname); usage(); } if (!pnetcmd.ibport) pnetcmd.ibport = 1; } if (pnetcmd.cmd == SMC_PNETID_GET || pnetcmd.cmd == SMC_PNETID_DEL) { if (pnetcmd.ethname) { fprintf(stderr, "%s: interface %s ignored\n", progname, pnetcmd.ethname); pnetcmd.ethname = NULL; } if (pnetcmd.ibname) { fprintf(stderr, "%s: device %s ignored\n", progname, pnetcmd.ibname); pnetcmd.ibname = NULL; } if (pnetcmd.ibport) { fprintf(stderr, "%s: ibport %d ignored\n", progname, pnetcmd.ibport); pnetcmd.ibport = 0; } } rc = genl_command(); return rc; } smc-tools-1.2.2/smctools_common.h0000644000175000017500000000122313554320160016041 0ustar rasplraspl/* * smc-tools/smctools_common.h * * Copyright IBM Corp. 2017 * * Author(s): Ursula Braun (ubraun@linux.ibm.com) * * Copyright IBM Corp. 2017 * * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html */ #ifndef SMCTOOLS_COMMON_H #define SMCTOOLS_COMMON_H #define STRINGIFY_1(x) #x #define STRINGIFY(x) STRINGIFY_1(x) #define RELEASE_STRING STRINGIFY (SMC_TOOLS_RELEASE) #define RELEASE_LEVEL "4335826" #define PF_SMC 43 #endif /* SMCTOOLS_COMMON_H */ smc-tools-1.2.2/smc_run0000755000175000017500000000201213554320160014046 0ustar rasplraspl#!/bin/bash # # SMC Tools - Shared Memory Communication Tools # # Copyright IBM Corp. 2017, 2018 # # All rights reserved. This program and the accompanying materials # are made available under the terms of the Eclipse Public License v1.0 # which accompanies this distribution, and is available at # http://www.eclipse.org/legal/epl-v10.html # LIB_NAME="libsmc-preload.so" # # Verify command line arguments and specify the preload library debug mode # if necessary. # command_line=$@ debug_option=$(echo "$command_line" | cut -f 1 -d " "); SMC_DEBUG=0; while getopts ":d" opt; do case $opt in d) SMC_DEBUG=1; command_line=${command_line#"$debug_option"}; command_line=${command_line##\ }; ;; \?) echo "`basename "$0"`: Error: Invalid option: -$OPTARG"; exit 1; ;; esac done if [ "x$command_line" = "x" ]; then echo "`basename "$0"`: Error: Missing command parameter"; exit 1; fi export SMC_DEBUG; # # Execute the specified command. # export LD_PRELOAD=$LD_PRELOAD:$LIB_NAME; exec $command_line exit $?; smc-tools-1.2.2/smc_dbg0000755000175000017500000000371213554320160014006 0ustar rasplraspl#!/bin/bash # Copyright IBM Corp. 2019 function usage() { echo; echo "Usage: smc_dbg [ OPTIONS ]"; echo; echo "Collect debug information"; echo; echo " -h, --help display this message"; echo " -t, --tgz generate .tgz file"; echo " -v, --version display version info"; echo; } function redirect() { if [ "$tgz" == "on" ]; then exec &>$tmpdir/$1; else echo; fi } tgz="off"; ARCH=`uname -m | cut -c1-4`; args=`getopt -u -o hvt -l help,version,tgz -- $*`; [ $? -ne 0 ] && exit 1; set -- $args; while [ $# -gt 0 ]; do case $1 in "-h" | "--help" ) usage; exit 0;; "-t" | "--tgz" ) tgz="on";; "-v" | "--version" ) echo "smc_dbg utility, smc-tools-1.2.2 (4335826)"; exit 0;; * ) esac shift; done if [ "$tgz" == "on" ]; then exec 3>&1 4>&2 tmpdir=`mktemp -d /tmp/smc_dbg-XXXXXX`; fi redirect version.txt; smcss -v smc_dbg -v smc_pnet -v smc_rnics -v if [ "$ARCH" == "s390" ]; then redirect devices.txt; echo "CCW Devices:" printf " Device CHPID PNET ID\n"; echo " -------------------------------------------"; for device in `ls -1 /sys/bus/ccwgroup/devices`; do chpid=`cat /sys/bus/ccwgroup/devices/$device/chpid | tr [A-F] [a-f]`; if [ -e /sys/devices/css0/chp0.$chpid/util_string ]; then pnetid="`cat /sys/devices/css0/chp0.$chpid/util_string | iconv -f IBM-1047 -t ASCII 2>/dev/null | tr -d '\000' | sed 's/^/ /'`"; else pnetid=""; fi printf " %8s %4s %s\n" $device 0x$chpid $pnetid; done echo; echo "PCI Devices:" smc_rnics | sed 's/^/ /'; redirect smcss_smcd; smcss --smcd; fi redirect smcss_all.txt; smcss --all --debug; redirect smcss_smcr; smcss --smcr; redirect pnet_table.txt; smc_pnet --show; if [ "$tgz" == "on" ]; then exec >&3 2>&4 cd /tmp; tar cvfz $tmpdir.tgz `basename $tmpdir` >/dev/null 2>&1; rm -rf $tmpdir; echo "Debug output written to $tmpdir.tgz"; fi exit 0; smc-tools-1.2.2/smc_pnet.80000644000175000017500000001063113554320160014361 0ustar rasplraspl.\" smc_pnet.8 .\" .\" .\" Copyright IBM Corp. 2017, 2019 .\" Author(s): Thomas Richter .\" Ursula Braun .\" ---------------------------------------------------------------------- .\" .TH SMC_PNET 8 "January 2017" "smc-tools" "Linux Programmer's Manual" .SH NAME smc_pnet \- create, destroy, and change the SMC PNET table. .SH SYNOPSIS .B smc_pnet { \fB\-a\fR | \fB\-\-add\fR \fI\fR } { \fB\-I\fR | \fB\-\-interface\fR } .P .B smc_pnet { \fB\-a\fR | \fB\-\-add\fR \fI\fR } { \fB\-D\fR | \fB\-\-ibdevice\fR } [ \fB\-P\fR | \fB\-\-ibport\fR ] .P .B smc_pnet { \fB\-a\fR | \fB\-\-add\fR \fI\fR } { \fB\-I\fR | \fB\-\-interface\fR } { \fB\-D\fR | \fB\-\-ibdevice\fR } [ \fB\-P\fR | \fB\-\-ibport\fR ] .P .B smc_pnet { \fB\-s\fR | \fB\-\-show\fR \fI\fR } .P .B smc_pnet { \fB\-d\fR | \fB\-\-delete\fR \fI\fR } .P .B smc_pnet { \fB\-f\fR | \fB\-\-flush\fR } .P .B smc_pnet { \fB\-v\fR | \fB\-\-version\fR } .P .B smc_pnet { \fB\-h\fR | \fB\-\-help\fR } .SH DESCRIPTION The SMC protocol requires grouping of standard Ethernet and RoCE networks or ISM devices. Such groups are called \fIPhysical Networks\fR (PNETs). The mapping is configured within a table called \fIpnet table\fR. Any available Ethernet interface can be combined with an available RDMA-capable network interface card (RNIC) or a DMA-capable ISM device, if they belong to the same Converged Ethernet fabric. To configure mapping of a RoCE Adapter port or an ISM device to a standard Ethernet interface, both devices need to have the same pnetid; either hardware-defined or user-defined using the pnet table. Hardware-defined pnetids cannot be overwritten. .P The .B smc_pnet command configures the pnet table. .SH OPTIONS By default, .B smc_pnet shows all entries of the pnet table. .TP .IR defines a name for a grouping of Ethernet interface and RNICs or ISM devices. A pnetid name consists of up to 16 alphanumeric uppercase characters without blanks. .TP .BR "\-a, \-\-add" creates a new pnetid definition to the pnet table (if it does not already exist). Only one pnetid can be defined for a certain Ethernet interface, a certain InfiniBand device port or a certain ISM device. Adding more than one pnetid fails. Hardware defined pnetids cannot be overwritten. .TP .BR "\-s, \-\-show" shows a certain pnetid definition in the pnet table. .TP .BR "\-d, \-\-delete" deletes an existing pnetid definition from the pnet table. .TP .BR "\-f, \-\-flush" removes all pnetid definitions from the pnet table. .TP .BR "\-I, \-\-interface " specifies the name of the Ethernet interface to be added for a certain pnetid definition. .TP .BR "\-D, \-\-ibdevice " specifies the ID of the InfiniBand device or ISM device. .TP .BR "\-P, \-\-ibport " specifies the port number of the InfiniBand device port. Valid numbers are 1 or 2. The default value is 1. .TP .BR "\-v, \-\-version" displays smc_pnet program version. .TP .BR "\-h, \-\-help" displays a brief smc_pnet usage information. .SH EXAMPLES .B Define pnetid ABC for the ethernet device names encf500 and bond0, and define .B pnetid ABC for the InfiniBand device ID 0001:00:00.0 (port 2) and the ISM .B device ID 0004:00:00.0: .RS 4 .PP .nf $ smc_pnet \-a ABC \-I encf500 $ smc_pnet \-a ABC \-I bond0 $ smc_pnet \-a ABC \-D 0001:00:00:00.0 \-P 2 $ smc_pnet \-a ABC \-D 0004:00:00:00.0 .RE .PP . .B Show all pnet table entries: .RS 4 .PP .nf $ smc_pnet ABC encf500 n/a 255 ABC bond0 n/a 255 ABC n/a 0001:00:00.0 2 ABC n/a 0004:00:00.0 1 .RE .PP . .B Define pnetid XYZ for the ethernet interface name vlan0201 and the InfiniBand .B device ID 0001:00:00.0 (port 1): .RS 4 .PP $ smc_pnet \-a XYZ \-I vlan0201 \-D 0001:00:00.0 \-P 1 .RE .PP . .B Show all entries for pnetid XYZ: .RS 4 .PP .nf $ smc_pnet \-s XYZ XYZ vlan0201 n/a 255 XYZ n/a 0001:00:00.0 1 .RE .PP . .B Delete all pnet table entries with pnetid named ABC: .RS 4 .PP $ smc_pnet \-d ABC .RE .PP . .B Delete all entries in the pnet table: .RS 4 .PP $ smc_pnet \-f .RE .PP . . .SH RETURN CODES Successful \fBsmc_pnet\fR commands return 0. If an error occurs, \fBsmc_pnet\fR writes a message to stderr and completes with a return code other than 0. .P .SH SEE ALSO .BR af_smc (7), .BR smc_rnics (8), .BR smc_run (8), .BR smcss (8) smc-tools-1.2.2/smc_run.80000644000175000017500000000232613554320160014221 0ustar rasplraspl.\" smc_run.8 .\" .\" .\" Copyright IBM Corp. 2017 .\" Author(s): Ursula Braun .\" ---------------------------------------------------------------------- .\" .TH SMC_RUN 8 "January 2017" "smc-tools" "Linux Programmer's Manual " .SH NAME smc_run \- start a TCP socket program with the capability to use SMC as networking protocol. .SH SYNOPSIS .B smc_run .RB [ \-d ] .I program .I parameters .SH DESCRIPTION .B smc_run starts a .IR program specified as argument with its .IR parameters allowing to use the SMC protocol for program-used TCP socket connections. The script specifies libsmc-preload.so as a preload shared library for the Linux program loader. The preload library libsmc-preload.so intercepts a few TCP socket calls and triggers the equivalent execution through SMC. The following options can be specified: .TP .BR "\-d" Display additional diagnostic messages during the program execution. .SH RETURN CODES On success, the .IR smc_run command returns 0. If an error occurs .IR smc_run writes a message to stdout and completes with a return code other than 0. .TP .B 1 An invalid option was specified. .P .SH SEE ALSO .BR af_smc (7), .BR smc_pnet (8), .BR smc_rnics (8), .BR smcss (8), .BR tcp (7) smc-tools-1.2.2/smcss.80000644000175000017500000001404313554320160013702 0ustar rasplraspl.\" smcss.8 .\" .\" .\" Copyright IBM Corp. 2017, 2018 .\" Author(s): Ursula Braun .\" ---------------------------------------------------------------------- .\" .TH SMCSS 8 "June 2018" "smc-tools" "Linux Programmer's Manual" .SH NAME smcss \- print information about the AF_SMC sockets and link groups. .SH SYNOPSIS .B smcss .RB [ \-\-debug | \-d ] .RB [ \-\-smcd | \-D ] .RB [ \-\-wide | \-W ] .P .B smcss .RB { \-\-all | -a } .RB [ \-\-debug | \-d ] .RB [ \-\-smcd | \-D ] .RB [ \-\-wide | \-W ] .P .B smcss .RB [ \-\-debug | \-d ] .RB [ \-\-smcr | \-R ] .RB [ \-\-wide | \-W ] .P .B smcss .RB { \-\-all | -a } .RB [ \-\-debug | \-d ] .RB [ \-\-smcr | \-R ] .RB [ \-\-wide | \-W ] .P .B smcss .RB { \-\-listening | \-l } .RB [ \-\-wide | \-W ] .P .B smcss .RB { \-\-version | \-v } .P .B smcss .RB { \-\-help | \-h } .SH DESCRIPTION .B smcss prints information about the Linux AF_SMC sockets and link groups. The type of information printed is controlled by the first argument, as follows: .TP .BR (none) .br displays a list of connecting, closing, or connected SMC sockets with basic information. .TP .BR "\-a, \-\-all" displays all types of SMC sockets: listening, opening, closing, and connected. .TP .BR "\-l, \-\-listening" shows listening sockets only. These are omitted by default. .SH OPTIONS .TP .BR "\-d, \-\-debug" displays additional debug information, such as shutdown state. .TP .BR "\-D, \-\-smcd displays additional SMC-D specific information. Shows SMC-D sockets only. .TP .BR "\-R, \-\-smcr displays additional SMC-R specific information. Shows SMC-R sockets only. .TP .BR "\-W, \-\-wide" do not truncate IP addresses. .SH OUTPUT .SS "State" The state of the socket. The state can be one of these values: .TP .I INIT The SMC socket is being initialized. It is not connected nor listening yet. .TP .I CLOSED The SMC socket is closed. It is not connected nor listening anymore. .TP .I LISTEN The SMC socket is a listening socket, waiting for incoming connection requests. .TP .I ACTIVE The SMC socket has an established connection. In this state, the TCP connection is fully established, rendezvous processing has been completed, and SMC peers can exchange data via RDMA. .TP .I PEERCLW1 No further data will be sent to the peer. .TP .I PEERCLW2 No further data will be sent to or received from the peer. .TP .I APPLCLW1 No further data will be received from the peer. .TP .I APPLCLW2 No further data will be received from or sent to the peer. .TP .I APPLFINCLW The peer has closed the socket. .TP .I PEERFINCLW The socket is closed locally. .TP .I PEERABORTW The socket was abnormally closed locally. .TP .I PROCESSABORT The peer has closed the socket abnormally. .SS "UID" User ID of the SMC socket. .SS "Inode" Inode attribute of the socket. .SS "Local Address" Address and port number of the local end of the SMC socket. The displayed address is truncated when it ends with '..'. .BR "-W, --wide" can be used to display addresses untruncated. .SS "Peer Address" Address and port number of the remote end of the socket. Analogous to "Local Address". .SS "Intf" When the socket is explicitly bound with setsockopt option SO_BINDTODEVICE then Intf shows the interface number of the Ethernet device to which the socket is bound. .SS "Mode" .TP .I SMCD The SMC socket uses SMC-D for data exchange. .TP .I SMCR The SMC socket uses SMC-R for data exchange. .TP .I TCP The SMC socket uses the TCP protocol for data exchange, because an SMC connection could not be established. .SS "ShutD" .TP .I <-> The SMC socket has not been shut down. .TP .I R-> The SMC socket is shut down one-way and cannot receive data. .TP .I <-W The SMC socket is shut down one-way and cannot send data. .TP .I R-W The SMC socket is shut down in both ways and cannot receive or send data. .SS "Token" Unique ID of the SMC socket connection. .SS "Sndbuf" Size of the to-be-sent window of the SMC socket connection. .SS "Rcvbuf" Size of the receiving window of the SMC socket connection (filled by peer). .SS "Peerbuf" Size of the peer receiving window of the SMC socket connection (to fill during RDMA-transfer). .SS "rxprod-Cursor" Describes the current cursor location of the "Rcvbuf" for data to be received from the peer. .SS "rxcons-Cursor" Describes the current cursor location of the "Peerbuf" for data sent to peer and confirmed by the peer. .SS "rxFlags" SMC socket connection flags set by and received from the peer. .SS "txprod-Cursor" Describes the current cursor location of the "Peerbuf" for data sent to peer. .SS "txcons-Cursor" Describes the current cursor location of the "Rcvbuf" for data received from the peer and confirmed to the peer. .SS "txFlags" SMC socket connection flags set locally and sent to the peer. .SS "txprep-Cursor" Describes the current cursor location of the "Sndbuf" for data to be sent. The data is to be moved to the "Peerbuf". .SS "txsent-Cursor" Describes the current cursor location of the "Sndbuf" for data sent. The data was moved to the "Peerbuf". .SS "txfin-Cursor" Describes the current cursor location of the "Sndbuf" for data sent and send completion confirmed. The data was moved to the "Peerbuf" and completion was confirmed. .SS "Role" .TP .I CLNT The link group of the SMC socket is used for client connections. .TP .I SERV The link group of the SMC socket is used for server connections. .SS "IB-Device" Name of the RoCE device used by the link group to which the SMC socket belongs. .SS "Port" Port of the RoCE device used by the link group to which the SMC socket belongs. .SS "Linkid" Unique link ID of the link within the link group to which the SMC socket belongs. .SS "GID" Gid of the RoCE port used by the link group to which the SMC socket belongs. .SS "Peer-GID" Gid of the Foreign RoCE port used by the link group to which the SMC socket belongs. .SS "VLAN" tbd. .SH RETURN CODES Successful .IR smcss commands return 0 and display the requested socket state table or link group information. If an error occurs, .IR smcss writes a message to stderr and completes with a return code other than 0. .P .SH SEE ALSO .BR af_smc (7), .BR smc_rnics (8), .BR smc_run (8), .BR smc_pnet (8) smc-tools-1.2.2/LICENSE0000644000175000017500000002734713554320160013501 0ustar rasplraspl Eclipse Public License - v 1.0 THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 1. DEFINITIONS "Contribution" means: a) in the case of the initial Contributor, the initial code and documentation distributed under this Agreement, and b) in the case of each subsequent Contributor: i) changes to the Program, and ii) additions to the Program; where such changes and/or additions to the Program originate from and are distributed by that particular Contributor. A Contribution 'originates' from a Contributor if it was added to the Program by such Contributor itself or anyone acting on such Contributor's behalf. Contributions do not include additions to the Program which: (i) are separate modules of software distributed in conjunction with the Program under their own license agreement, and (ii) are not derivative works of the Program. "Contributor" means any person or entity that distributes the Program. "Licensed Patents" mean patent claims licensable by a Contributor which are necessarily infringed by the use or sale of its Contribution alone or when combined with the Program. "Program" means the Contributions distributed in accordance with this Agreement. "Recipient" means anyone who receives the Program under this Agreement, including all Contributors. 2. GRANT OF RIGHTS a) Subject to the terms of this Agreement, each Contributor hereby grants Recipient a non-exclusive, worldwide, royalty-free copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, distribute and sublicense the Contribution of such Contributor, if any, and such derivative works, in source code and object code form. b) Subject to the terms of this Agreement, each Contributor hereby grants Recipient a non-exclusive, worldwide, royalty-free patent license under Licensed Patents to make, use, sell, offer to sell, import and otherwise transfer the Contribution of such Contributor, if any, in source code and object code form. This patent license shall apply to the combination of the Contribution and the Program if, at the time the Contribution is added by the Contributor, such addition of the Contribution causes such combination to be covered by the Licensed Patents. The patent license shall not apply to any other combinations which include the Contribution. No hardware per se is licensed hereunder. c) Recipient understands that although each Contributor grants the licenses to its Contributions set forth herein, no assurances are provided by any Contributor that the Program does not infringe the patent or other intellectual property rights of any other entity. Each Contributor disclaims any liability to Recipient for claims brought by any other entity based on infringement of intellectual property rights or otherwise. As a condition to exercising the rights and licenses granted hereunder, each Recipient hereby assumes sole responsibility to secure any other intellectual property rights needed, if any. For example, if a third party patent license is required to allow Recipient to distribute the Program, it is Recipient's responsibility to acquire that license before distributing the Program. d) Each Contributor represents that to its knowledge it has sufficient copyright rights in its Contribution, if any, to grant the copyright license set forth in this Agreement. 3. REQUIREMENTS A Contributor may choose to distribute the Program in object code form under its own license agreement, provided that: a) it complies with the terms and conditions of this Agreement; and b) its license agreement: i) effectively disclaims on behalf of all Contributors all warranties and conditions, express and implied, including warranties or conditions of title and non-infringement, and implied warranties or conditions of merchantability and fitness for a particular purpose; ii) effectively excludes on behalf of all Contributors all liability for damages, including direct, indirect, special, incidental and consequential damages, such as lost profits; iii) states that any provisions which differ from this Agreement are offered by that Contributor alone and not by any other party; and iv) states that source code for the Program is available from such Contributor, and informs licensees how to obtain it in a reasonable manner on or through a medium customarily used for software exchange. When the Program is made available in source code form: a) it must be made available under this Agreement; and b) a copy of this Agreement must be included with each copy of the Program. Contributors may not remove or alter any copyright notices contained within the Program. Each Contributor must identify itself as the originator of its Contribution, if any, in a manner that reasonably allows subsequent Recipients to identify the originator of the Contribution. 4. COMMERCIAL DISTRIBUTION Commercial distributors of software may accept certain responsibilities with respect to end users, business partners and the like. While this license is intended to facilitate the commercial use of the Program, the Contributor who includes the Program in a commercial product offering should do so in a manner which does not create potential liability for other Contributors. Therefore, if a Contributor includes the Program in a commercial product offering, such Contributor ("Commercial Contributor") hereby agrees to defend and indemnify every other Contributor ("Indemnified Contributor") against any losses, damages and costs (collectively "Losses") arising from claims, lawsuits and other legal actions brought by a third party against the Indemnified Contributor to the extent caused by the acts or omissions of such Commercial Contributor in connection with its distribution of the Program in a commercial product offering. The obligations in this section do not apply to any claims or Losses relating to any actual or alleged intellectual property infringement. In order to qualify, an Indemnified Contributor must: a) promptly notify the Commercial Contributor in writing of such claim, and b) allow the Commercial Contributor to control, and cooperate with the Commercial Contributor in, the defense and any related settlement negotiations. The Indemnified Contributor may participate in any such claim at its own expense. For example, a Contributor might include the Program in a commercial product offering, Product X. That Contributor is then a Commercial Contributor. If that Commercial Contributor then makes performance claims, or offers warranties related to Product X, those performance claims and warranties are such Commercial Contributor's responsibility alone. Under this section, the Commercial Contributor would have to defend claims against the other Contributors related to those performance claims and warranties, and if a court requires any other Contributor to pay any damages as a result, the Commercial Contributor must pay those damages. 5. NO WARRANTY EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the appropriateness of using and distributing the Program and assumes all risks associated with its exercise of rights under this Agreement , including but not limited to the risks and costs of program errors, compliance with applicable laws, damage to or loss of data, programs or equipment, and unavailability or interruption of operations. 6. DISCLAIMER OF LIABILITY EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 7. GENERAL If any provision of this Agreement is invalid or unenforceable under applicable law, it shall not affect the validity or enforceability of the remainder of the terms of this Agreement, and without further action by the parties hereto, such provision shall be reformed to the minimum extent necessary to make such provision valid and enforceable. If Recipient institutes patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Program itself (excluding combinations of the Program with other software or hardware) infringes such Recipient's patent(s), then such Recipient's rights granted under Section 2(b) shall terminate as of the date such litigation is filed. All Recipient's rights under this Agreement shall terminate if it fails to comply with any of the material terms or conditions of this Agreement and does not cure such failure in a reasonable period of time after becoming aware of such noncompliance. If all Recipient's rights under this Agreement terminate, Recipient agrees to cease use and distribution of the Program as soon as reasonably practicable. However, Recipient's obligations under this Agreement and any licenses granted by Recipient relating to the Program shall continue and survive. Everyone is permitted to copy and distribute copies of this Agreement, but in order to avoid inconsistency the Agreement is copyrighted and may only be modified in the following manner. The Agreement Steward reserves the right to publish new versions (including revisions) of this Agreement from time to time. No one other than the Agreement Steward has the right to modify this Agreement. The Eclipse Foundation is the initial Agreement Steward. The Eclipse Foundation may assign the responsibility to serve as the Agreement Steward to a suitable separate entity. Each new version of the Agreement will be given a distinguishing version number. The Program (including Contributions) may always be distributed subject to the version of the Agreement under which it was received. In addition, after a new version of the Agreement is published, Contributor may elect to distribute the Program (including its Contributions) under the new version. Except as expressly stated in Sections 2(a) and 2(b) above, Recipient receives no rights or licenses to the intellectual property of any Contributor under this Agreement, whether expressly, by implication, estoppel or otherwise. All rights in the Program not expressly granted under this Agreement are reserved. This Agreement is governed by the laws of the State of New York and the intellectual property laws of the United States of America. No party to this Agreement will bring a legal action under this Agreement more than one year after the cause of action arose. Each party waives its rights to a jury trial in any resulting litigation. smc-tools-1.2.2/Makefile0000644000175000017500000001245713554320160014130 0ustar rasplraspl # # SMC Tools - Shared Memory Communication Tools # # Copyright IBM Corp. 2016, 2018 # # All rights reserved. This program and the accompanying materials # are made available under the terms of the Eclipse Public License v1.0 # which accompanies this distribution, and is available at # http://www.eclipse.org/legal/epl-v10.html # SMC_TOOLS_RELEASE = 1.2.2 VER_MAJOR = $(shell echo $(SMC_TOOLS_RELEASE) | cut -d '.' -f 1) ARCHTYPE = $(shell uname -m) ARCH := $(shell getconf LONG_BIT) DISTRO := $(shell lsb_release -si 2>/dev/null) ifneq ("${V}","1") MAKEFLAGS += --quiet cmd = echo $1$2; else cmd = endif CCC = $(call cmd," CC ",$@)${CC} LINK = $(call cmd," LINK ",$@)${CC} GEN = $(call cmd," GEN ",$@)sed DESTDIR ?= PREFIX = /usr BINDIR = ${PREFIX}/bin MANDIR = ${PREFIX}/share/man BASH_AUTODIR = $(shell pkg-config --variable=completionsdir bash-completion 2>/dev/null) OWNER = $(shell id -un) GROUP = $(shell id -gn) INSTALL_FLAGS_BIN = -g $(GROUP) -o $(OWNER) -m755 INSTALL_FLAGS_MAN = -g $(GROUP) -o $(OWNER) -m644 INSTALL_FLAGS_LIB = -g $(GROUP) -o $(OWNER) -m4755 STUFF_32BIT = 0 # # Check that 31/32-bit build tools are available. # ifeq ($(ARCH),64) ifeq ($(DISTRO),Ubuntu) LIBDIR = ${PREFIX}/lib/${ARCHTYPE}-linux-gnu else LIBDIR = ${PREFIX}/lib64 endif ifneq ("$(wildcard ${PREFIX}/include/gnu/stubs-32.h)","") STUFF_32BIT = 1 LIBDIR32 = ${PREFIX}/lib endif else ifeq ($(DISTRO),Ubuntu) LIBDIR = ${PREFIX}/lib/s390-linux-gnu else LIBDIR = ${PREFIX}/lib endif endif all: libsmc-preload.so libsmc-preload32.so smcss smc_pnet CFLAGS ?= -Wall -O3 -g ALL_CFLAGS = -DSMC_TOOLS_RELEASE=$(SMC_TOOLS_RELEASE) $(CFLAGS) ifeq ($(ARCHTYPE),s390x) MACHINE_OPT32="-m31" else MACHINE_OPT32="-m32" endif %: %.in $(GEN) -e "s#x.x.x#$(SMC_TOOLS_RELEASE)#g" < $< > $@ smc-preload.o: smc-preload.c ${CCC} ${CFLAGS} -fPIC -c smc-preload.c libsmc-preload.so: smc-preload.o ${LINK} ${LDFLAGS} -shared smc-preload.o -ldl -Wl,-z,defs,-soname,$@.$(VER_MAJOR) -o $@ chmod u+s $@ libsmc-preload32.so: smc-preload.c ifeq ($(ARCH),64) ifeq ($(STUFF_32BIT),1) ${CCC} ${CFLAGS} -fPIC -c ${MACHINE_OPT32} $< -o smc-preload32.o ${LINK} ${LDFLAGS} -shared smc-preload32.o ${MACHINE_OPT32} -ldl -Wl,-soname,$@.$(VER_MAJOR) -o $@ chmod u+s $@ else $(warning "Warning: Skipping 31/32-bit library build because 31/32-bit build tools") $(warning " are unavailable. SMC will not support 31/32 bit applications") $(warning " unless the glibc devel package for the appropriate addressing") $(warning " mode is installed and the preload libraries are rebuilt.") endif endif ifneq ($(shell sh -c 'command -v pkg-config'),) SMC_PNET_CFLAGS = $(shell pkg-config --silence-errors --cflags libnl-genl-3.0) SMC_PNET_LFLAGS = $(shell pkg-config --silence-errors --libs libnl-genl-3.0) else SMC_PNET_CFLAGS = -I /usr/include/libnl3 SMC_PNET_LFLAGS = -lnl-genl-3 -lnl-3 endif smc_pnet: smc_pnet.c smc.h smctools_common.h @if [ ! -e /usr/include/libnl3/netlink/netlink.h ]; then \ printf "**************************************************************\n" >&2; \ printf "* Missing build requirement for: %-45s\n" $@ >&2; \ printf "* Install package..............: %-45s\n" "devel package for libnl3" >&2; \ printf "* Install package..............: %-45s\n" "devel package for libnl3-genl" >&2; \ printf "* NOTE: Package names might differ by platform\n" >&2; \ printf "* On Ubuntu try libnl-3-dev and libnl-genl-3-dev\n" >&2; \ printf "**************************************************************\n" >&2; \ exit 1; \ fi ${CCC} ${ALL_CFLAGS} ${SMC_PNET_CFLAGS} ${LDFLAGS} -o $@ $< ${SMC_PNET_LFLAGS} smcss: smcss.c smc_diag.h smctools_common.h ${CCC} ${ALL_CFLAGS} ${LDFLAGS} $< -o $@ install: all echo " INSTALL" install -d -m755 $(DESTDIR)$(LIBDIR) $(DESTDIR)$(BINDIR) $(DESTDIR)$(MANDIR)/man7 \ $(DESTDIR)$(BASH_AUTODIR) $(DESTDIR)$(MANDIR)/man8 install $(INSTALL_FLAGS_LIB) libsmc-preload.so $(DESTDIR)$(LIBDIR) #ifeq ($(STUFF_32BIT),1) # install -d -m755 $(DESTDIR)$(LIBDIR32) # install $(INSTALL_FLAGS_LIB) libsmc-preload32.so $(DESTDIR)$(LIBDIR32)/libsmc-preload.so #endif install $(INSTALL_FLAGS_BIN) smc_run $(DESTDIR)$(BINDIR) install $(INSTALL_FLAGS_BIN) smcss $(DESTDIR)$(BINDIR) install $(INSTALL_FLAGS_BIN) smc_pnet $(DESTDIR)$(BINDIR) install $(INSTALL_FLAGS_BIN) smc_dbg $(DESTDIR)$(BINDIR) ifeq ($(shell uname -m | cut -c1-4),s390) install $(INSTALL_FLAGS_BIN) smc_rnics $(DESTDIR)$(BINDIR) install $(INSTALL_FLAGS_MAN) smc_rnics.8 $(DESTDIR)$(MANDIR)/man8 endif install $(INSTALL_FLAGS_MAN) af_smc.7 $(DESTDIR)$(MANDIR)/man7 install $(INSTALL_FLAGS_MAN) smc_run.8 $(DESTDIR)$(MANDIR)/man8 install $(INSTALL_FLAGS_MAN) smc_pnet.8 $(DESTDIR)$(MANDIR)/man8 install $(INSTALL_FLAGS_MAN) smcss.8 $(DESTDIR)$(MANDIR)/man8 ifneq ($(BASH_AUTODIR),) install $(INSTALL_FLAGS_MAN) smc-tools.autocomplete $(DESTDIR)$(BASH_AUTODIR)/smc-tools ln -sfr $(DESTDIR)$(BASH_AUTODIR)/smc-tools $(DESTDIR)$(BASH_AUTODIR)/smc_rnics ln -sfr $(DESTDIR)$(BASH_AUTODIR)/smc-tools $(DESTDIR)$(BASH_AUTODIR)/smc_dbg ln -sfr $(DESTDIR)$(BASH_AUTODIR)/smc-tools $(DESTDIR)$(BASH_AUTODIR)/smcss ln -sfr $(DESTDIR)$(BASH_AUTODIR)/smc-tools $(DESTDIR)$(BASH_AUTODIR)/smc_pnet endif clean: echo " CLEAN" rm -f *.o *.so smcss smc_pnet smc-tools-1.2.2/smcss.c0000644000175000017500000003526413554320160013765 0ustar rasplraspl/* * SMC Tools - Shared Memory Communication Tools * * Copyright IBM Corp. 2017, 2018 * * Author(s): Ursula Braun * * User space program for SMC Socket display * * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "smctools_common.h" #include "smc_diag.h" #define MAGIC_SEQ 123456 #define ADDR_LEN_SHORT 23 struct rtnl_handle { int fd; struct sockaddr_nl local; struct sockaddr_nl peer; __u32 seq; __u32 dump; int proto; FILE *dump_fp; int flags; }; static char *progname; int show_debug; int show_smcr; int show_smcd; int show_wide; int listening = 0; int all = 0; static int rtnl_open(struct rtnl_handle *rth) { socklen_t addr_len; int rcvbuf = 1024 * 1024; int sndbuf = 32768; rth->fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_SOCK_DIAG); if (rth->fd < 0) { perror("Cannot open netlink socket"); return EXIT_FAILURE; } if (setsockopt(rth->fd, SOL_SOCKET, SO_SNDBUF, &sndbuf, sizeof(sndbuf)) < 0) { perror("SO_SNDBUF"); return EXIT_FAILURE; } if (setsockopt(rth->fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(rcvbuf)) < 0) { perror("SO_RCVBUF"); return EXIT_FAILURE; } memset(&rth->local, 0, sizeof(rth->local)); rth->local.nl_family = AF_NETLINK; rth->local.nl_groups = 0; if (bind(rth->fd, (struct sockaddr*)&rth->local, sizeof(rth->local)) < 0) { perror("Cannot bind netlink socket"); return EXIT_FAILURE; } addr_len = sizeof(rth->local); if (getsockname(rth->fd, (struct sockaddr*)&rth->local, &addr_len) < 0) { perror("Cannot getsockname"); return EXIT_FAILURE; } if (addr_len != sizeof(rth->local)) { fprintf(stderr, "Wrong address length %d\n", addr_len); return EXIT_FAILURE; } if (rth->local.nl_family != AF_NETLINK) { fprintf(stderr, "Wrong address family %d\n", rth->local.nl_family); return EXIT_FAILURE; } rth->seq = time(NULL); return 0; } void rtnl_close(struct rtnl_handle *rth) { if (rth->fd >= 0) { close(rth->fd); rth->fd = -1; } } #define DIAG_REQUEST(_req, _r) \ struct { \ struct nlmsghdr nlh; \ _r; \ } _req = { \ .nlh = { \ .nlmsg_type = SOCK_DIAG_BY_FAMILY, \ .nlmsg_flags = NLM_F_ROOT|NLM_F_REQUEST, \ .nlmsg_seq = MAGIC_SEQ, \ .nlmsg_len = sizeof(_req), \ }, \ } static int sockdiag_send(int fd) { struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; DIAG_REQUEST(req, struct smc_diag_req r); struct msghdr msg; struct iovec iov[1]; int iovlen = 1; memset(&req.r, 0, sizeof(req.r)); req.r.diag_family = PF_SMC; iov[0] = (struct iovec) { .iov_base = &req, .iov_len = sizeof(req) }; msg = (struct msghdr) { .msg_name = (void *)&nladdr, .msg_namelen = sizeof(nladdr), .msg_iov = iov, .msg_iovlen = iovlen, }; if (show_debug) req.r.diag_ext |= (1<<(SMC_DIAG_CONNINFO-1)); if (show_smcr) req.r.diag_ext |= (1<<(SMC_DIAG_LGRINFO-1)); if (show_smcd) req.r.diag_ext |= (1<<(SMC_DIAG_DMBINFO-1)); if (sendmsg(fd, &msg, 0) < 0) { close(fd); return EXIT_FAILURE; } return 0; } static void print_header(void) { printf("State "); printf("UID "); printf("Inode "); printf("Local Address "); printf("Peer Address "); printf("Intf "); printf("Mode "); if (show_debug) { printf("Shutd "); printf("Token "); printf("Sndbuf "); printf("Rcvbuf "); printf("Peerbuf "); printf("rxprod-Cursor "); printf("rxcons-Cursor "); printf("rxFlags "); printf("txprod-Cursor "); printf("txcons-Cursor "); printf("txFlags "); printf("txprep-Cursor "); printf("txsent-Cursor "); printf("txfin-Cursor "); } if (show_smcr) { printf("Role "); printf("IB-device "); printf("Port "); printf("Linkid "); printf("GID "); printf("Peer-GID"); } if (show_smcd) { printf("GID "); printf("Token "); printf("Peer-GID "); printf("Peer-Token "); printf("Linkid"); } printf("\n"); } static const char *smc_state(unsigned char x) { static char buf[16]; switch (x) { case 1: return "ACTIVE"; case 2: return "INIT"; case 7: return "CLOSED"; case 10: return "LISTEN"; case 20: return "PEERCLOSEWAIT1"; case 21: return "PEERCLOSEWAIT2"; case 22: return "APPCLOSEWAIT1"; case 23: return "APPCLOSEWAIT2"; case 24: return "APPFINCLOSEWAIT1"; case 25: return "PEERFINCLOSEWAIT"; case 26: return "PEERABORTWAIT"; case 27: return "PROCESSABORT"; default: sprintf(buf, "%#x?", x); return buf; } } static void parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len) { unsigned short type; memset(tb, 0, sizeof(struct rtattr *) * (max + 1)); while (RTA_OK(rta, len)) { type = rta->rta_type; if ((type <= max) && (!tb[type])) tb[type] = rta; rta = RTA_NEXT(rta,len); } if (len) fprintf(stderr, "!!!Deficit %d, rta_len=%d\n", len, rta->rta_len); } /* format one sockaddr / port */ static void addr_format(char *buf, size_t buf_len, size_t short_len, __be32 addr[4], int port) { char *errmsg = "(inet_ntop error)"; /* very unlikely */ char addr_buf[64], port_buf[16]; int addr_len, port_len; int af; /* There was an upstream discussion about the content of the * diag_family field. Originally it was AF_SMC, but was changed with * IPv6 support to indicate AF_INET or AF_INET6. Upstream complained * later that there is no way to separate AF_INET from AF_SMC diag msgs. * We now change back the value of the diag_family field to be always * AF_SMC. We now 'parse' the IP address type. * Note that smc_diag.c in kernel always clears the whole addr field * before the ip address is copied into and we can rely on that here. */ if (addr[1] == 0 && addr[2] == 0 && addr[3] == 0) af = AF_INET; else af = AF_INET6; if (!inet_ntop(af, addr, addr_buf, sizeof(addr_buf))) { strcpy(buf, errmsg); return; } sprintf(port_buf, "%d", port); addr_len = strlen(addr_buf); port_len = strlen(port_buf); if (!show_wide && (addr_len + 1 + port_len > short_len)) { /* truncate addr string */ addr_len = short_len - 1 - port_len - 2; strncpy(buf, addr_buf, addr_len); buf[addr_len] = '\0'; strcat(buf, ".."); /* indicate truncation */ strcat(buf, ":"); strncat(buf, port_buf, port_len); } else { snprintf(buf, buf_len, "%s:%s", addr_buf, port_buf); } } static void show_one_smc_sock(struct nlmsghdr *nlh) { struct smc_diag_msg *r = NLMSG_DATA(nlh); struct rtattr *tb[SMC_DIAG_MAX + 1]; unsigned long long inode; char txtbuf[128]; parse_rtattr(tb, SMC_DIAG_MAX, (struct rtattr *)(r+1), nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r))); if (listening) { if ( r->diag_state != 10) return; } else { if (!all && (r->diag_state == 10 || r->diag_state == 2)) return; } if (show_smcr && r->diag_mode != SMC_DIAG_MODE_SMCR) return; /* show only SMC-R sockets */ if (show_smcd && r->diag_mode != SMC_DIAG_MODE_SMCD) return; /* show only SMC-D sockets */ printf("%-14s ", smc_state(r->diag_state)); printf("%05d ", r->diag_uid); inode = r->diag_inode; printf("%07llu ", inode); if (r->diag_state == 2) /* INIT state */ goto newline; addr_format(txtbuf, sizeof(txtbuf), ADDR_LEN_SHORT, r->id.idiag_src, ntohs(r->id.idiag_sport)); printf("%-*s ", (int)MAX(ADDR_LEN_SHORT, strlen(txtbuf)), txtbuf); if (r->diag_state == 10) /* LISTEN state */ goto newline; addr_format(txtbuf, sizeof(txtbuf), ADDR_LEN_SHORT, r->id.idiag_dst, ntohs(r->id.idiag_dport)); printf("%-*s ", (int)MAX(ADDR_LEN_SHORT, strlen(txtbuf)), txtbuf); printf("%04x ", r->id.idiag_if); if (r->diag_state == 7) /* CLOSED state */ goto newline; if (r->diag_mode == SMC_DIAG_MODE_FALLBACK_TCP) { printf("TCP "); /* when available print local and peer fallback reason code */ if (tb[SMC_DIAG_FALLBACK] && tb[SMC_DIAG_FALLBACK]->rta_len >= sizeof(struct smc_diag_fallback)) { struct smc_diag_fallback fallback; fallback = *(struct smc_diag_fallback *)RTA_DATA(tb[SMC_DIAG_FALLBACK]); printf("0x%08x", fallback.reason); if (fallback.peer_diagnosis) printf("/0x%08x", fallback.peer_diagnosis); } goto newline; } else if (r->diag_mode == SMC_DIAG_MODE_SMCD) printf("%4s ", "SMCD"); else printf("%4s ", "SMCR"); if (show_debug) { if (tb[SMC_DIAG_SHUTDOWN] && tb[SMC_DIAG_SHUTDOWN]->rta_len >= sizeof(__u8)) { unsigned char mask; mask = *(__u8 *)RTA_DATA(tb[SMC_DIAG_SHUTDOWN]); printf(" %c-%c ", mask & 1 ? 'R' : '<', mask & 2 ? 'W' : '>'); } if (tb[SMC_DIAG_CONNINFO] && tb[SMC_DIAG_CONNINFO]->rta_len >= sizeof(struct smc_diag_conninfo)) { struct smc_diag_conninfo cinfo; cinfo = *(struct smc_diag_conninfo *)RTA_DATA(tb[SMC_DIAG_CONNINFO]); printf("%08x ", cinfo.token); printf("%08x ", cinfo.sndbuf_size); printf("%08x ", cinfo.rmbe_size); printf("%08x ", cinfo.peer_rmbe_size); printf("%04x:%08x ", cinfo.rx_prod.wrap, cinfo.rx_prod.count); printf("%04x:%08x ", cinfo.rx_cons.wrap, cinfo.rx_cons.count); printf("%02x:%02x ", cinfo.rx_prod_flags, cinfo.rx_conn_state_flags); printf("%04x:%08x ", cinfo.tx_prod.wrap, cinfo.tx_prod.count); printf("%04x:%08x ", cinfo.tx_cons.wrap, cinfo.tx_cons.count); printf("%02x:%02x ", cinfo.tx_prod_flags, cinfo.tx_conn_state_flags); printf("%04x:%08x ", cinfo.tx_prep.wrap, cinfo.tx_prep.count); printf("%04x:%08x ", cinfo.tx_sent.wrap, cinfo.tx_sent.count); printf("%04x:%08x ", cinfo.tx_fin.wrap, cinfo.tx_fin.count); } } if (show_smcr) { if (tb[SMC_DIAG_LGRINFO] && tb[SMC_DIAG_LGRINFO]->rta_len >= sizeof(struct smc_diag_lgrinfo)) { struct smc_diag_lgrinfo linfo; linfo = *(struct smc_diag_lgrinfo *)RTA_DATA(tb[SMC_DIAG_LGRINFO]); printf("%4s ", linfo.role ? "SERV" : "CLNT"); printf("%-15s ", linfo.lnk[0].ibname); printf("%02x ", linfo.lnk[0].ibport); printf("%02x ", linfo.lnk[0].link_id); printf("%-40s ", linfo.lnk[0].gid); printf("%s", linfo.lnk[0].peer_gid); } } if (show_smcd) { if (tb[SMC_DIAG_DMBINFO] && tb[SMC_DIAG_DMBINFO]->rta_len >= sizeof(struct smcd_diag_dmbinfo)) { struct smcd_diag_dmbinfo dinfo; dinfo = *(struct smcd_diag_dmbinfo *)RTA_DATA(tb[SMC_DIAG_DMBINFO]); printf("%016llx ", dinfo.my_gid); printf("%016llx ", dinfo.token); printf("%016llx ", dinfo.peer_gid); printf("%016llx ", dinfo.peer_token); printf("%08x ", dinfo.linkid); } } newline: printf("\n"); } static int rtnl_dump(struct rtnl_handle *rth) { struct sockaddr_nl nladdr; struct iovec iov; struct msghdr msg = { .msg_name = &nladdr, .msg_namelen = sizeof(nladdr), .msg_iov = &iov, .msg_iovlen = 1, }; char buf[32768]; int msglen; struct nlmsghdr *h = (struct nlmsghdr *)buf; memset(buf, 0, sizeof(buf)); iov.iov_base = buf; iov.iov_len = sizeof(buf); again: msglen = recvmsg(rth->fd, &msg, 0); if (msglen < 0) { if (errno == EINTR || errno == EAGAIN) goto again; fprintf(stderr, "netlink receive error %s (%d)\n", strerror(errno), errno); return EXIT_FAILURE; } if (msglen == 0) { fprintf(stderr, "EOF on netlink\n"); return EXIT_FAILURE; } while(NLMSG_OK(h, msglen)) { if (h->nlmsg_flags & NLM_F_DUMP_INTR) fprintf(stderr, "Dump interrupted\n"); if (h->nlmsg_type == NLMSG_DONE) break; /* process next */ if (h->nlmsg_type == NLMSG_ERROR) { if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr))) { fprintf(stderr, "ERROR truncated\n"); } else { perror("RTNETLINK answers"); } return EXIT_FAILURE; } show_one_smc_sock(h); h = NLMSG_NEXT(h, msglen); } if (msg.msg_flags & MSG_TRUNC) { fprintf(stderr, "Message truncated\n"); goto again; } return EXIT_SUCCESS; } static int smc_show_netlink() { struct rtnl_handle rth; int rc = 0; if ((rc = rtnl_open(&rth))) return EXIT_FAILURE; rth.dump = MAGIC_SEQ; if ((rc = sockdiag_send(rth.fd))) goto exit; print_header(); rc = rtnl_dump(&rth); exit: rtnl_close(&rth); return rc; } static const struct option long_opts[] = { { "all", 0, 0, 'a' }, { "debug", 0, 0, 'd' }, { "listening", 0, 0, 'l' }, { "smcd", 0, 0, 'D' }, { "smcr", 0, 0, 'R' }, { "version", 0, 0, 'V' }, { "wide", 0, 0, 'W' }, { "help", 0, 0, 'h' }, { NULL, 0, NULL, 0} }; static void _usage(FILE *dest) { fprintf(dest, "Usage: %s [ OPTIONS ]\n" "\t-h, --help this message\n" "\t-V, --version show version information\n" "\t-a, --all show all sockets\n" "\t-l, --listening show listening sockets\n" "\t-d, --debug show debug socket information\n" "\t-W, --wide do not truncate IP addresses\n" "\t-D, --smcd show detailed SMC-D information (shows only SMC-D sockets)\n" "\t-R, --smcr show detailed SMC-R information (shows only SMC-R sockets)\n" "\tno OPTIONS show all connected sockets\n", progname); } static void help(void) __attribute__((noreturn)); static void help(void) { _usage(stdout); exit(EXIT_SUCCESS); } static void usage(void) __attribute__((noreturn)); static void usage(void) { _usage(stderr); exit(EXIT_FAILURE); } int main(int argc, char *argv[]) { char *slash; int ch; progname = (slash = strrchr(argv[0], '/')) ? slash + 1 : argv[0]; while ((ch = getopt_long(argc, argv, "aldDRhvVW", long_opts, NULL)) != EOF) { switch (ch) { case 'a': all++; break; case 'l': listening++; break; case 'd': show_debug++; break; case 'D': show_smcd++; break; case 'R': show_smcr++; break; case 'v': case 'V': printf("smcss utility, smc-tools-%s (%s)\n", RELEASE_STRING, RELEASE_LEVEL); exit(0); case 'W': show_wide++; break; case 'h': help(); case '?': default: usage(); } } if (show_smcr && show_smcd) { fprintf(stderr, "--smcd together with --smcr is not supported\n"); usage(); } if (listening && show_debug) { fprintf(stderr, "--listening together with --debug is not supported\n"); usage(); } if (listening && all) { fprintf(stderr, "--listening together with --all is not supported\n"); usage(); } if (listening && show_smcr) { fprintf(stderr, "--listening together with --smcr is not supported\n"); usage(); } if (listening && show_smcd) { fprintf(stderr, "--listening together with --smcd is not supported\n"); usage(); } return smc_show_netlink(); } smc-tools-1.2.2/smc_diag.h0000644000175000017500000000532513554320160014403 0ustar rasplraspl/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _UAPI_SMC_DIAG_H_ #define _UAPI_SMC_DIAG_H_ #include #include #include /* Request structure */ struct smc_diag_req { __u8 diag_family; __u8 pad[2]; __u8 diag_ext; /* Query extended information */ struct inet_diag_sockid id; }; /* Base info structure. It contains socket identity (addrs/ports/cookie) based * on the internal clcsock, and more SMC-related socket data */ struct smc_diag_msg { __u8 diag_family; __u8 diag_state; __u8 diag_mode; __u8 diag_shutdown; struct inet_diag_sockid id; __u32 diag_uid; __u64 diag_inode; }; /* Mode of a connection */ enum { SMC_DIAG_MODE_SMCR, SMC_DIAG_MODE_FALLBACK_TCP, SMC_DIAG_MODE_SMCD, }; /* Extensions */ enum { SMC_DIAG_NONE, SMC_DIAG_CONNINFO, SMC_DIAG_LGRINFO, SMC_DIAG_SHUTDOWN, SMC_DIAG_DMBINFO, SMC_DIAG_FALLBACK, __SMC_DIAG_MAX, }; #define SMC_DIAG_MAX (__SMC_DIAG_MAX - 1) /* SMC_DIAG_CONNINFO */ struct smc_diag_cursor { __u16 reserved; __u16 wrap; __u32 count; }; struct smc_diag_conninfo { __u32 token; /* unique connection id */ __u32 sndbuf_size; /* size of send buffer */ __u32 rmbe_size; /* size of RMB element */ __u32 peer_rmbe_size; /* size of peer RMB element */ /* local RMB element cursors */ struct smc_diag_cursor rx_prod; /* received producer cursor */ struct smc_diag_cursor rx_cons; /* received consumer cursor */ /* peer RMB element cursors */ struct smc_diag_cursor tx_prod; /* sent producer cursor */ struct smc_diag_cursor tx_cons; /* sent consumer cursor */ __u8 rx_prod_flags; /* received producer flags */ __u8 rx_conn_state_flags; /* recvd connection flags*/ __u8 tx_prod_flags; /* sent producer flags */ __u8 tx_conn_state_flags; /* sent connection flags*/ /* send buffer cursors */ struct smc_diag_cursor tx_prep; /* prepared to be sent cursor */ struct smc_diag_cursor tx_sent; /* sent cursor */ struct smc_diag_cursor tx_fin; /* confirmed sent cursor */ }; /* SMC_DIAG_LINKINFO */ #define IB_DEVICE_NAME_MAX 64 struct smc_diag_linkinfo { __u8 link_id; /* link identifier */ __u8 ibname[IB_DEVICE_NAME_MAX]; /* name of the RDMA device */ __u8 ibport; /* RDMA device port number */ __u8 gid[40]; /* local GID */ __u8 peer_gid[40]; /* peer GID */ }; struct smc_diag_lgrinfo { struct smc_diag_linkinfo lnk[1]; __u8 role; }; struct smc_diag_fallback { __u32 reason; __u32 peer_diagnosis; }; struct smcd_diag_dmbinfo { /* SMC-D Socket internals */ __u32 linkid; /* Link identifier */ __u64 peer_gid; /* Peer GID */ __u64 my_gid; /* My GID */ __u64 token; /* Token of DMB */ __u64 peer_token; /* Token of remote DMBE */ }; #endif /* _UAPI_SMC_DIAG_H_ */