pax_global_header00006660000000000000000000000064147170455630014526gustar00rootroot0000000000000052 comment=a2b43e19be1e64c31b626ca827506977cac93488 prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/000077500000000000000000000000001471704556300233065ustar00rootroot00000000000000prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/.circleci/000077500000000000000000000000001471704556300251415ustar00rootroot00000000000000prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/.circleci/config.yml000066400000000000000000000006601471704556300271330ustar00rootroot00000000000000--- # node-exporter-textfile-collector-scripts has moved to GitHub Actions. # Circle CI is not disabled repository-wise so that previous pull requests continue working. # This file does not generate any CircleCI workflow. version: 2.1 jobs: noop-job: docker: - image: cimg/base:stable steps: - run: echo "No-op job; config migrated to GitHub Actions." workflows: noop-workflow: jobs: - noop-job prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/.flake8000066400000000000000000000001451471704556300244610ustar00rootroot00000000000000[flake8] import-order-style = google max-line-length = 100 exclude= .git, .circleci, .github, prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/.github/000077500000000000000000000000001471704556300246465ustar00rootroot00000000000000prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/.github/workflows/000077500000000000000000000000001471704556300267035ustar00rootroot00000000000000prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/.github/workflows/lint.yml000066400000000000000000000006051471704556300303750ustar00rootroot00000000000000--- name: Lint on: - push - pull_request jobs: flake8: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: '3.12' - uses: py-actions/flake8@v2 shellcheck: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: ludeeus/action-shellcheck@master prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/CODE_OF_CONDUCT.md000066400000000000000000000002301471704556300261000ustar00rootroot00000000000000# Prometheus Community Code of Conduct Prometheus follows the [CNCF Code of Conduct](https://github.com/cncf/foundation/blob/main/code-of-conduct.md). prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/LICENSE000066400000000000000000000261351471704556300243220ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/MAINTAINERS.md000066400000000000000000000001441471704556300254010ustar00rootroot00000000000000* Ben Kochie @SuperQ * Daniel Swarbrick @dswarbrick prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/README.md000066400000000000000000000014201471704556300245620ustar00rootroot00000000000000# Textfile Collector Example Scripts These scripts are examples to be used with the Node Exporter textfile collector. To use these scripts, we recommend using `sponge` to atomically write the output. ``` | sponge ``` Sponge comes from [moreutils](https://joeyh.name/code/moreutils/) * [brew install moreutils](http://brewformulas.org/Moreutil) * [apt install moreutils](https://packages.debian.org/search?keywords=moreutils) * [pkg install moreutils](https://www.freshports.org/sysutils/moreutils/) *Caveat*: sponge cannot write atomically if the path specified by the `TMPDIR` environment variable is not on the same filesystem as the target output file. For more information see: https://github.com/prometheus/node_exporter#textfile-collector prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/SECURITY.md000066400000000000000000000002541471704556300251000ustar00rootroot00000000000000# Reporting a security issue The Prometheus security policy, including how to report vulnerabilities, can be found here: prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/apt_info.py000077500000000000000000000103721471704556300254650ustar00rootroot00000000000000#!/usr/bin/env python3 """ Description: Expose metrics from apt. This is inspired by and intended to be a replacement for the original apt.sh. This script deliberately does *not* update the apt cache. You need something else to run `apt update` regularly for the metrics to be up to date. This can be done in numerous ways, but the canonical way is to use the normal `APT::Periodic::Update-Package-Lists` setting. This, for example, will enable a nightly job that runs `apt update`: echo 'APT::Periodic::Update-Package-Lists "1";' > /etc/apt/apt.conf.d/99_auto_apt_update.conf See /usr/lib/apt/apt.systemd.daily for details. Dependencies: python3-apt, python3-prometheus-client Authors: Kyle Fazzari Daniel Swarbrick """ import apt import apt_pkg import collections import os from prometheus_client import CollectorRegistry, Gauge, generate_latest _UpgradeInfo = collections.namedtuple("_UpgradeInfo", ["labels", "count"]) def _convert_candidates_to_upgrade_infos(candidates): changes_dict = collections.defaultdict(lambda: collections.defaultdict(int)) for candidate in candidates: origins = sorted( {f"{o.origin}:{o.codename}/{o.archive}" for o in candidate.origins} ) changes_dict[",".join(origins)][candidate.architecture] += 1 changes_list = list() for origin in sorted(changes_dict.keys()): for arch in sorted(changes_dict[origin].keys()): changes_list.append( _UpgradeInfo( labels=dict(origin=origin, arch=arch), count=changes_dict[origin][arch], ) ) return changes_list def _write_pending_upgrades(registry, cache): candidates = { p.candidate for p in cache if p.is_upgradable } upgrade_list = _convert_candidates_to_upgrade_infos(candidates) if upgrade_list: g = Gauge('apt_upgrades_pending', "Apt packages pending updates by origin", ['origin', 'arch'], registry=registry) for change in upgrade_list: g.labels(change.labels['origin'], change.labels['arch']).set(change.count) def _write_held_upgrades(registry, cache): held_candidates = { p.candidate for p in cache if p.is_upgradable and p._pkg.selected_state == apt_pkg.SELSTATE_HOLD } upgrade_list = _convert_candidates_to_upgrade_infos(held_candidates) if upgrade_list: g = Gauge('apt_upgrades_held', "Apt packages pending updates but held back.", ['origin', 'arch'], registry=registry) for change in upgrade_list: g.labels(change.labels['origin'], change.labels['arch']).set(change.count) def _write_autoremove_pending(registry, cache): autoremovable_packages = {p for p in cache if p.is_auto_removable} g = Gauge('apt_autoremove_pending', "Apt packages pending autoremoval.", registry=registry) g.set(len(autoremovable_packages)) def _write_cache_timestamps(registry): g = Gauge('apt_package_cache_timestamp_seconds', "Apt update last run time.", registry=registry) apt_pkg.init_config() if ( apt_pkg.config.find_b("APT::Periodic::Update-Package-Lists") and os.path.isfile("/var/lib/apt/periodic/update-success-stamp") ): # if we run updates automatically with APT::Periodic, we can # check this timestamp file if it exists stamp_file = "/var/lib/apt/periodic/update-success-stamp" else: # if not, let's just fallback on the partial file of the lists directory stamp_file = '/var/lib/apt/lists/partial' try: g.set(os.stat(stamp_file).st_mtime) except OSError: pass def _write_reboot_required(registry): g = Gauge('node_reboot_required', "Node reboot is required for software updates.", registry=registry) g.set(int(os.path.isfile('/run/reboot-required'))) def _main(): cache = apt.cache.Cache() registry = CollectorRegistry() _write_pending_upgrades(registry, cache) _write_held_upgrades(registry, cache) _write_autoremove_pending(registry, cache) _write_cache_timestamps(registry) _write_reboot_required(registry) print(generate_latest(registry).decode(), end='') if __name__ == "__main__": _main() prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/btrfs_stats.py000077500000000000000000000065301471704556300262250ustar00rootroot00000000000000#!/usr/bin/env python3 # Collect per-device btrfs filesystem errors. Designed to work on Debian and Centos 6 and later. # Requires btrfs-progs package to be installed. # # Consider using node_exporter's built-in btrfs collector instead of this script. import glob import os.path import re import subprocess from prometheus_client import CollectorRegistry, Gauge, generate_latest DEVICE_PATTERN = re.compile(r"^\[([^\]]+)\]\.(\S+)\s+(\d+)$") def get_btrfs_mount_points(): """List all btrfs mount points. Yields: (string) filesystem mount points. """ with open("/proc/mounts") as f: for line in f: parts = line.split() if parts[2] == "btrfs": yield parts[1] def get_btrfs_errors(mountpoint): """Get per-device errors for a btrfs mount point. Args: mountpoint: (string) path to a mount point. Yields: (device, error_type, error_count) tuples, where: device: (string) path to block device. error_type: (string) type of btrfs error. error_count: (int) number of btrfs errors of a given type. """ p = subprocess.Popen(["btrfs", "device", "stats", mountpoint], stdout=subprocess.PIPE) stdout, stderr = p.communicate() if p.returncode != 0: raise RuntimeError("btrfs returned exit code %d" % p.returncode) for line in stdout.splitlines(): if not line: continue # Sample line: # [/dev/vdb1].flush_io_errs 0 m = DEVICE_PATTERN.match(line.decode("utf-8")) if not m: raise RuntimeError("unexpected output from btrfs: '%s'" % line) yield m.group(1), m.group(2), int(m.group(3)) def btrfs_error_metrics(registry): """Collect btrfs error metrics.""" g = Gauge('errors_total', 'number of btrfs errors', ['mountpoint', 'device', 'type'], namespace='node_btrfs', registry=registry) for mountpoint in get_btrfs_mount_points(): for device, error_type, error_count in get_btrfs_errors(mountpoint): g.labels(mountpoint, device, error_type).set(error_count) def btrfs_allocation_metrics(registry): """Collect btrfs allocation metrics.""" metric_to_filename = { 'size_bytes': 'total_bytes', 'used_bytes': 'bytes_used', 'reserved_bytes': 'bytes_reserved', 'pinned_bytes': 'bytes_pinned', 'disk_size_bytes': 'disk_total', 'disk_used_bytes': 'disk_used', } metrics = {} for m, f in metric_to_filename.items(): metrics[m] = Gauge(m, 'btrfs allocation data ({})'.format(f), ['fs', 'type'], namespace='node_btrfs', subsystem='allocation', registry=registry) for alloc in glob.glob("/sys/fs/btrfs/*/allocation"): fs = os.path.basename(os.path.dirname(alloc)) for type_ in ('data', 'metadata', 'system'): for m, f in metric_to_filename.items(): filename = os.path.join(alloc, type_, f) with open(filename) as f: value = int(f.read().strip()) metrics[m].labels(fs, type_).set(value) if __name__ == "__main__": registry = CollectorRegistry() btrfs_error_metrics(registry) btrfs_allocation_metrics(registry) print(generate_latest(registry).decode(), end='') prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/chrony.py000077500000000000000000000034361471704556300251730ustar00rootroot00000000000000#!/usr/bin/env python3 # # Description: Gather metrics from Chrony NTP. # import subprocess import sys from prometheus_client import CollectorRegistry, Gauge, generate_latest def chronyc(*args, check=True): """Chrony client wrapper Returns: (str) Data piped to stdout by the chrony subprocess. """ return subprocess.run( ['chronyc', *args], stdout=subprocess.PIPE, check=check ).stdout.decode('utf-8') def chronyc_tracking(): return chronyc('-c', 'tracking').split(',') def main(): registry = CollectorRegistry() chrony_tracking = chronyc_tracking() if len(chrony_tracking) != 14: print("ERROR: Unable to parse chronyc tracking CSV", file=sys.stderr) sys.exit(1) g = Gauge('chrony_tracking_reference_info', 'The stratum of the current preferred source', ['ref_id', 'ref_host'], registry=registry) g.labels(chrony_tracking[0], chrony_tracking[1]).set(1) g = Gauge('chrony_tracking_stratum', 'The stratum of the current preferred source', registry=registry) g.set(chrony_tracking[2]) g = Gauge('chrony_tracking_system_offset_seconds', 'The current estimated drift of system time from true time', registry=registry) g.set(chrony_tracking[4]) g = Gauge('chrony_tracking_last_offset_seconds', 'The estimated local offset on the last clock update.', registry=registry) g.set(chrony_tracking[5]) g = Gauge('chrony_tracking_root_dispersion_seconds', 'The absolute bound on the computer’s clock accuracy', registry=registry) g.set(chrony_tracking[5]) print(generate_latest(registry).decode("utf-8"), end='') if __name__ == "__main__": main() prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/deleted_libraries.py000077500000000000000000000050011471704556300273210ustar00rootroot00000000000000#!/usr/bin/env python3 """ Script to count the number of deleted libraries that are linked by running processes and expose a summary as Prometheus metrics. The aim is to discover processes that are still using libraries that have since been updated, perhaps due security vulnerabilities. """ import errno import glob import os import sys from prometheus_client import CollectorRegistry, Gauge, generate_latest def main(): processes_linking_deleted_libraries = {} for path in glob.glob('/proc/*/maps'): try: with open(path, 'rb') as file: for line in file: part = line.decode().strip().split() if len(part) == 7: library = part[5] comment = part[6] if '/lib/' in library and '(deleted)' in comment: if path not in processes_linking_deleted_libraries: processes_linking_deleted_libraries[path] = {} if library in processes_linking_deleted_libraries[path]: processes_linking_deleted_libraries[path][library] += 1 else: processes_linking_deleted_libraries[path][library] = 1 except EnvironmentError as e: # Ignore non-existent files, since the files may have changed since # we globbed. if e.errno != errno.ENOENT: sys.exit('Failed to open file: {0}'.format(path)) num_processes_per_library = {} for process, library_count in processes_linking_deleted_libraries.items(): libraries_seen = set() for library, count in library_count.items(): if library in libraries_seen: continue libraries_seen.add(library) if library in num_processes_per_library: num_processes_per_library[library] += 1 else: num_processes_per_library[library] = 1 registry = CollectorRegistry() g = Gauge('node_processes_linking_deleted_libraries', 'Count of running processes that link a deleted library', ['library_path', 'library_name'], registry=registry) for library, count in num_processes_per_library.items(): dir_path, basename = os.path.split(library) g.labels(dir_path, basename).set(count) print(generate_latest(registry).decode(), end='') if __name__ == "__main__": main() prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/directory-size.sh000077500000000000000000000011621471704556300266210ustar00rootroot00000000000000#!/usr/bin/env sh # # Expose directory usage metrics, passed as an argument. # # Usage: add this to crontab: # # */5 * * * * prometheus directory-size.sh /var/lib/prometheus | sponge /var/lib/node_exporter/directory_size.prom # # sed pattern taken from https://www.robustperception.io/monitoring-directory-sizes-with-the-textfile-collector/ # # Author: Antoine Beaupré echo "# HELP node_directory_size_bytes Disk space used by some directories" echo "# TYPE node_directory_size_bytes gauge" du --block-size=1 --summarize "$@" \ | awk '{ print "node_directory_size_bytes{directory=\"" $2 "\"} " $1 }' prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/fstab-check.sh000077500000000000000000000007331471704556300260220ustar00rootroot00000000000000#!/usr/bin/env bash echo "# HELP node_fstab_mount_status List and status of filesystem mountpoints (0 = not mounted, 1 = mounted)" echo "# TYPE node_fstab_mount_status gauge" mapfile -t mountpoints < <(awk '$1 !~ /^#/ && $2 ~ /^[/]/ {print $2}' /etc/fstab) for mount in "${mountpoints[@]}" do if ! findmnt "$mount" &> /dev/null then echo "node_fstab_mount_status{mountpoint=\"$mount\"} 0" else echo "node_fstab_mount_status{mountpoint=\"$mount\"} 1" fi done prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/inotify-instances000077500000000000000000000053751471704556300267140ustar00rootroot00000000000000#!/usr/bin/env python3 """ Expose Linux inotify(7) instance resource consumption. Operational properties: - This script may be invoked as an unprivileged user; in this case, metrics will only be exposed for processes owned by that unprivileged user. - No metrics will be exposed for processes that do not hold any inotify fds. Requires Python 3.5 or later. """ import collections import os import sys from prometheus_client import CollectorRegistry, Gauge, generate_latest class Error(Exception): pass class _PIDGoneError(Error): pass _Process = collections.namedtuple( "Process", ["pid", "uid", "command", "inotify_instances"]) def _read_bytes(name): with open(name, mode='rb') as f: return f.read() def _pids(): for n in os.listdir("/proc"): if not n.isdigit(): continue yield int(n) def _pid_uid(pid): try: s = os.stat("/proc/{}".format(pid)) except FileNotFoundError: raise _PIDGoneError() return s.st_uid def _pid_command(pid): # Avoid GNU ps(1) for it truncates comm. # https://bugs.launchpad.net/ubuntu/+source/procps/+bug/295876/comments/3 try: cmdline = _read_bytes("/proc/{}/cmdline".format(pid)) except FileNotFoundError: raise _PIDGoneError() if not len(cmdline): return "" try: prog = cmdline[0:cmdline.index(0x00)] except ValueError: prog = cmdline return os.path.basename(prog).decode(encoding="ascii", errors="surrogateescape") def _pid_inotify_instances(pid): instances = 0 try: for fd in os.listdir("/proc/{}/fd".format(pid)): try: target = os.readlink("/proc/{}/fd/{}".format(pid, fd)) except FileNotFoundError: continue if target == "anon_inode:inotify": instances += 1 except FileNotFoundError: raise _PIDGoneError() return instances def _get_processes(): for p in _pids(): try: yield _Process(p, _pid_uid(p), _pid_command(p), _pid_inotify_instances(p)) except (PermissionError, _PIDGoneError): continue def _get_processes_nontrivial(): return (p for p in _get_processes() if p.inotify_instances > 0) def main(args_unused=None): registry = CollectorRegistry() g = Gauge('inotify_instances', 'Total number of inotify instances held open by a process.', ['pid', 'uid', 'command'], registry=registry) for proc in _get_processes_nontrivial(): g.labels(proc.pid, proc.uid, proc.command).set(proc.inotify_instances) print(generate_latest(registry).decode(), end='') if __name__ == "__main__": sys.exit(main(sys.argv)) prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/ipmitool000077500000000000000000000040261471704556300250720ustar00rootroot00000000000000#!/usr/bin/awk -f # # Converts output of `ipmitool sensor` to prometheus format. # # With GNU awk: # ipmitool sensor | ./ipmitool > ipmitool.prom # # With BSD awk: # ipmitool sensor | awk -f ./ipmitool > ipmitool.prom # function export(values, name) { if (values["metric_count"] < 1) { return } delete values["metric_count"] printf("# HELP %s%s %s sensor reading from ipmitool\n", namespace, name, help[name]); printf("# TYPE %s%s gauge\n", namespace, name); for (sensor in values) { printf("%s%s{sensor=\"%s\"} %f\n", namespace, name, sensor, values[sensor]); } } # Fields are Bar separated, with space padding. BEGIN { FS = "[ ]*[|][ ]*"; namespace = "node_ipmi_"; # Friendly description of the type of sensor for HELP. help["temperature_celsius"] = "Temperature"; help["volts"] = "Voltage"; help["amps"] = "Current"; help["power_watts"] = "Power"; help["speed_rpm"] = "Fan"; help["percent"] = "Device"; help["status"] = "Chassis status"; temperature_celsius["metric_count"] = 0; volts["metric_count"] = 0; amps["metric_count"] = 0; power_watts["metric_count"] = 0; speed_rpm["metric_count"] = 0; percent["metric_count"] = 0; status["metric_count"] = 0; } # Not a valid line. { if (NF < 3) { next } } # $2 is value field. $2 ~ /na/ { next } # $3 is type field. $3 ~ /degrees C/ { temperature_celsius[$1] = $2; temperature_celsius["metric_count"]++; } $3 ~ /Volts/ { volts[$1] = $2; volts["metric_count"]++; } $3 ~ /Amps/ { amps[$1] = $2; amps["metric_count"]++; } $3 ~ /Watts/ { power_watts[$1] = $2; power_watts["metric_count"]++; } $3 ~ /RPM/ { speed_rpm[$1] = $2; speed_rpm["metric_count"]++; } $3 ~ /percent/ { percent[$1] = $2; percent["metric_count"]++; } $3 ~ /discrete/ { status[$1] = sprintf("%d", substr($2,3,2)); status["metric_count"]++; } END { export(temperature_celsius, "temperature_celsius"); export(volts, "volts"); export(amps, "amps"); export(power_watts, "power_watts"); export(speed_rpm, "speed_rpm"); export(percent, "percent"); export(status, "status"); } prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/lvm-prom-collector000077500000000000000000000114641471704556300267770ustar00rootroot00000000000000#!/usr/bin/env bash # # Expose various types of information about lvm2 # # Usage: lvm-prom-collector # # Options: # # -g for used and free space of logical volume groups # -p for used and free space of physical volumes. # -s for the percentage usage of the snapshots # -t for the percentage usage of the thin pools # # * * * * * root lvm-prom-collector -g | sponge /var/lib/prometheus/node-exporter/lvm.prom # # This will expose every minute information about the logical volume groups # # Author: Badreddin Aboubakr set -eu # Ensure predictable numeric / date formats, etc. export LC_ALL=C display_usage() { echo "This script must be run with super-user privileges." echo "Usage: lvm-prom-collector options" echo "Options:" echo "Expose various types of information about lvm2" echo "Use -g for used and free space of logical volume groups." echo "Use -p for used and free space of physical volumes." echo "Use -s for the percentage usage of snapshots." echo "Use -t for the percentage usage of thin pools." } if [ "$(id -u)" != "0" ]; then 1>&2 echo "This script must be run with super-user privileges." exit 1 fi if [ $# -eq 0 ]; then display_usage exit 1 fi thin_pools=false snapshots=false physical=false groups=false while getopts "ahtpsg" opt; do case $opt in a) thin_pools=true snapshots=true physical=true groups=true ;; p) physical=true ;; s) snapshots=true ;; g) groups=true ;; t) thin_pools=true ;; h) display_usage exit 0 ;; \?) display_usage exit 1 ;; esac done if [ "$physical" = true ]; then echo "# HELP node_physical_volume_size Physical volume size in bytes" echo "# TYPE node_physical_volume_size gauge" echo "# HELP node_physical_volume_free Physical volume free space in bytes" echo "# TYPE node_physical_volume_free gauge" pvs_output=$(pvs --noheadings --units b --nosuffix --nameprefixes --unquoted --options pv_name,pv_fmt,pv_free,pv_size,pv_uuid 2>/dev/null) echo "$pvs_output" | while IFS= read -r line; do # Skip if the line is empty [ -z "$line" ] && continue # shellcheck disable=SC2086 declare $line echo "node_physical_volume_size{name=\"$LVM2_PV_NAME\", uuid=\"$LVM2_PV_UUID\", format=\"$LVM2_PV_FMT\"} $LVM2_PV_SIZE" echo "node_physical_volume_free{name=\"$LVM2_PV_NAME\", uuid=\"$LVM2_PV_UUID\", format=\"$LVM2_PV_FMT\"} $LVM2_PV_FREE" done fi if [ "$snapshots" = true ]; then echo "# HELP node_lvm_snapshots_allocated percentage of allocated data to a snapshot" echo "# TYPE node_lvm_snapshots_allocated gauge" lvs_output=$(lvs --noheadings --select 'lv_attr=~[^s.*]' --units b --nosuffix --unquoted --nameprefixes --options lv_uuid,vg_name,data_percent 2>/dev/null) echo "$lvs_output" | while IFS= read -r line; do # Skip if the line is empty [ -z "$line" ] && continue # shellcheck disable=SC2086 declare $line echo "node_lvm_snapshots_allocated{uuid=\"$LVM2_LV_UUID\", vgroup=\"$LVM2_VG_NAME\"} $LVM2_DATA_PERCENT" done fi if [ "$thin_pools" = true ]; then lvs_output=$(lvs --noheadings --select 'lv_attr=~[^t.*]' --units b --nosuffix --unquoted --nameprefixes --options lv_uuid,vg_name,data_percent,metadata_percent 2>/dev/null) echo "# HELP node_lvm_thin_pools_allocated percentage of allocated thin pool data" echo "# TYPE node_lvm_thin_pools_allocated gauge" echo "$lvs_output" | while IFS= read -r line; do # Skip if the line is empty [ -z "$line" ] && continue # shellcheck disable=SC2086 declare $line echo "node_lvm_thin_pools_allocated{uuid=\"$LVM2_LV_UUID\", vgroup=\"$LVM2_VG_NAME\"} $LVM2_DATA_PERCENT" done echo "# HELP node_lvm_thin_pools_metadata percentage of allocated thin pool metadata" echo "# TYPE node_lvm_thin_pools_metadata gauge" echo "$lvs_output" | while IFS= read -r line; do # Skip if the line is empty [ -z "$line" ] && continue # shellcheck disable=SC2086 declare $line echo "node_lvm_thin_pools_metadata{uuid=\"$LVM2_LV_UUID\", vgroup=\"$LVM2_VG_NAME\"} $LVM2_METADATA_PERCENT" done fi if [ "$groups" = true ]; then echo "# HELP node_volume_group_size Volume group size in bytes" echo "# TYPE node_volume_group_size gauge" echo "# HELP node_volume_group_free volume group free space in bytes" echo "# TYPE node_volume_group_free gauge" vgs_output=$(vgs --noheadings --units b --nosuffix --unquoted --nameprefixes --options vg_name,vg_free,vg_size 2>/dev/null) echo "$vgs_output" | while IFS= read -r line; do # Skip if the line is empty [ -z "$line" ] && continue # shellcheck disable=SC2086 declare $line echo "node_volume_group_size{name=\"$LVM2_VG_NAME\"} $LVM2_VG_SIZE" echo "node_volume_group_free{name=\"$LVM2_VG_NAME\"} $LVM2_VG_FREE" done fi prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/md_info.sh000077500000000000000000000041541471704556300252640ustar00rootroot00000000000000#!/usr/bin/env bash set -eu for MD_DEVICE in /dev/md/*; do if [ -b "$MD_DEVICE" ]; then # Subshell to avoid eval'd variables from leaking between iterations ( # Resolve symlink to discover device, e.g. /dev/md127 MD_DEVICE_NUM=$(readlink -f "${MD_DEVICE}") # Remove /dev/ prefix MD_DEVICE_NUM=${MD_DEVICE_NUM#/dev/} MD_DEVICE=${MD_DEVICE#/dev/md/} # Query sysfs for info about md device SYSFS_BASE="/sys/devices/virtual/block/${MD_DEVICE_NUM}/md" MD_LAYOUT=$(cat "${SYSFS_BASE}/layout") MD_LEVEL=$(cat "${SYSFS_BASE}/level") MD_METADATA_VERSION=$(cat "${SYSFS_BASE}/metadata_version") MD_NUM_RAID_DISKS=$(cat "${SYSFS_BASE}/raid_disks") # Remove 'raid' prefix from RAID level MD_LEVEL=${MD_LEVEL#raid} # Output disk metrics for RAID_DISK in "${SYSFS_BASE}"/rd[0-9]*; do DISK=$(readlink -f "${RAID_DISK}/block") DISK_DEVICE=$(basename "${DISK}") RAID_DISK_DEVICE=$(basename "${RAID_DISK}") RAID_DISK_INDEX=${RAID_DISK_DEVICE#rd} RAID_DISK_STATE=$(cat "${RAID_DISK}/state") DISK_SET="" # Determine disk set using logic from mdadm: https://github.com/neilbrown/mdadm/commit/2c096ebe4b if [[ ${RAID_DISK_STATE} == "in_sync" && ${MD_LEVEL} == 10 && $((MD_LAYOUT & ~0x1ffff)) ]]; then NEAR_COPIES=$((MD_LAYOUT & 0xff)) FAR_COPIES=$(((MD_LAYOUT >> 8) & 0xff)) COPIES=$((NEAR_COPIES * FAR_COPIES)) if [[ $((MD_NUM_RAID_DISKS % COPIES == 0)) && $((COPIES <= 26)) ]]; then DISK_SET=$((RAID_DISK_INDEX % COPIES)) fi fi echo -n "node_md_disk_info{disk_device=\"${DISK_DEVICE}\", md_device=\"${MD_DEVICE_NUM}\"" if [[ -n ${DISK_SET} ]]; then SET_LETTERS=({A..Z}) echo -n ", md_set=\"${SET_LETTERS[${DISK_SET}]}\"" fi echo "} 1" done # Output RAID array metrics # NOTE: Metadata version is a label rather than a separate metric because the version can be a string echo "node_md_info{md_device=\"${MD_DEVICE_NUM}\", md_name=\"${MD_DEVICE}\", raid_level=\"${MD_LEVEL}\", md_metadata_version=\"${MD_METADATA_VERSION}\"} 1" ) fi done prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/md_info_detail.sh000077500000000000000000000104151471704556300266030ustar00rootroot00000000000000#!/usr/bin/env bash # # Note: This script uses "mdadm --detail" to get some of the metrics, so it must be run as root. # It is designed to be run periodically in a cronjob, and output to /var/lib/node_exporter/textfile_collector/md_info_detail.prom # $ cat /etc/cron.d/prometheus_md_info_detail # * * * * * bash /var/lib/node_exporter/md_info_detail.sh > /var/lib/node_exporter/md_info_detail.prom.$$ && mv /var/lib/node_exporter/md_info_detail.prom.$$ /var/lib/node_exporter/md_info_detail.prom set -eu for MD_DEVICE in /dev/md/*; do if [ -b "$MD_DEVICE" ]; then # Subshell to avoid eval'd variables from leaking between iterations ( # Resolve symlink to discover device, e.g. /dev/md127 MD_DEVICE_NUM=$(readlink -f "${MD_DEVICE}") # Remove /dev/ prefix MD_DEVICE_NUM=${MD_DEVICE_NUM#/dev/} MD_DEVICE=${MD_DEVICE#/dev/md/} # Query sysfs for info about md device SYSFS_BASE="/sys/devices/virtual/block/${MD_DEVICE_NUM}/md" MD_LAYOUT=$(cat "${SYSFS_BASE}/layout") MD_LEVEL=$(cat "${SYSFS_BASE}/level") MD_METADATA_VERSION=$(cat "${SYSFS_BASE}/metadata_version") MD_NUM_RAID_DISKS=$(cat "${SYSFS_BASE}/raid_disks") # Remove 'raid' prefix from RAID level MD_LEVEL=${MD_LEVEL#raid} # Output disk metrics for RAID_DISK in "${SYSFS_BASE}"/rd[0-9]*; do DISK=$(readlink -f "${RAID_DISK}/block") DISK_DEVICE=$(basename "${DISK}") RAID_DISK_DEVICE=$(basename "${RAID_DISK}") RAID_DISK_INDEX=${RAID_DISK_DEVICE#rd} RAID_DISK_STATE=$(cat "${RAID_DISK}/state") DISK_SET="" # Determine disk set using logic from mdadm: https://github.com/neilbrown/mdadm/commit/2c096ebe4b if [[ ${RAID_DISK_STATE} == "in_sync" && ${MD_LEVEL} == 10 && $((MD_LAYOUT & ~0x1ffff)) ]]; then NEAR_COPIES=$((MD_LAYOUT & 0xff)) FAR_COPIES=$(((MD_LAYOUT >> 8) & 0xff)) COPIES=$((NEAR_COPIES * FAR_COPIES)) if [[ $((MD_NUM_RAID_DISKS % COPIES == 0)) && $((COPIES <= 26)) ]]; then DISK_SET=$((RAID_DISK_INDEX % COPIES)) fi fi echo -n "node_md_disk_info{disk_device=\"${DISK_DEVICE}\", md_device=\"${MD_DEVICE_NUM}\"" if [[ -n ${DISK_SET} ]]; then SET_LETTERS=({A..Z}) echo -n ", md_set=\"${SET_LETTERS[${DISK_SET}]}\"" fi echo "} 1" done # Get output from mdadm --detail (Note: root/sudo required) MDADM_DETAIL_OUTPUT=$(mdadm --detail /dev/"${MD_DEVICE_NUM}") # Output RAID "Devices", "Size" and "Event" metrics, from the output of "mdadm --detail" while IFS= read -r line ; do # Filter out these keys that have numeric values that increment up if echo "$line" | grep -E -q "Devices :|Array Size :| Used Dev Size :|Events :"; then MDADM_DETAIL_KEY=$(echo "$line" | cut -d ":" -f 1 | tr -cd '[a-zA-Z0-9]._-') MDADM_DETAIL_VALUE=$(echo "$line" | cut -d ":" -f 2 | cut -d " " -f 2 | sed 's:^ ::') echo "node_md_info_${MDADM_DETAIL_KEY}{md_device=\"${MD_DEVICE_NUM}\", md_name=\"${MD_DEVICE}\", raid_level=\"${MD_LEVEL}\", md_num_raid_disks=\"${MD_NUM_RAID_DISKS}\", md_metadata_version=\"${MD_METADATA_VERSION}\"} ${MDADM_DETAIL_VALUE}" fi done <<< "$MDADM_DETAIL_OUTPUT" # Output RAID detail metrics info from the output of "mdadm --detail" # NOTE: Sending this info as labels rather than separate metrics, because some of them can be strings. echo -n "node_md_info{md_device=\"${MD_DEVICE_NUM}\", md_name=\"${MD_DEVICE}\", raid_level=\"${MD_LEVEL}\", md_num_raid_disks=\"${MD_NUM_RAID_DISKS}\", md_metadata_version=\"${MD_METADATA_VERSION}\"" while IFS= read -r line ; do # Filter for lines with a ":", to use for Key/Value pairs in labels if echo "$line" | grep -E -q ":" ; then # Exclude lines with these keys, as they're values are numbers that increment up and captured in individual metrics above if echo "$line" | grep -E -qv "^/|Array Size|Used Dev Size|Events|Update Time|Check Status|Rebuild Status" ; then echo -n ", " MDADM_DETAIL_KEY=$(echo "$line" | cut -d ":" -f 1 | tr -cd '[a-zA-Z0-9]._-') MDADM_DETAIL_VALUE=$(echo "$line" | cut -d ":" -f 2- | sed 's:^ ::') echo -n "${MDADM_DETAIL_KEY}=\"${MDADM_DETAIL_VALUE}\"" fi fi done <<< "$MDADM_DETAIL_OUTPUT" echo "} 1" ) fi done prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/mellanox_hca_temp000077500000000000000000000033621471704556300267170ustar00rootroot00000000000000#!/usr/bin/env bash # # Script to read Mellanox HCA temperature using the Mellanox mget_temp_ext tool # # Copyright 2018 The Prometheus Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # Author: Jan Phillip Greimann set -eu # check if root if [ "$EUID" -ne 0 ]; then echo "${0##*/}: Please run as root!" >&2 exit 1 fi # check if programs are installed if ! command -v mget_temp_ext >/dev/null 2>&1; then echo "${0##*/}: mget_temp_ext is not installed. Aborting." >&2 exit 1 fi cat <&2 fi done # if device is empty, no device was found if [ -z "${device-}" ]; then echo "${0##*/}: No InfiniBand HCA device found!" >&2 exit 1 fi prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/mock/000077500000000000000000000000001471704556300242375ustar00rootroot00000000000000prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/mock/fixtures/000077500000000000000000000000001471704556300261105ustar00rootroot00000000000000ntpq_-c_rv_0_offset,sys_jitter,rootdisp,rootdelay000066400000000000000000000001061471704556300376410ustar00rootroot00000000000000prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/mock/fixturesrootdelay=27.506, rootdisp=8.13, offset=0.006675, sys_jitter=0.854537 prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/mock/fixtures/ntpq_-np000066400000000000000000000031001471704556300275610ustar00rootroot00000000000000 remote refid st t when poll reach delay offset jitter =============================================================================== 0.ubuntu.pool.n .POOL. 16 p - 256 0 0.0000 0.0000 0.0001 1.ubuntu.pool.n .POOL. 16 p - 256 0 0.0000 0.0000 0.0001 2.ubuntu.pool.n .POOL. 16 p - 256 0 0.0000 0.0000 0.0001 3.ubuntu.pool.n .POOL. 16 p - 256 0 0.0000 0.0000 0.0001 +185.125.190.58 86.23.195.30 2 u 16 64 377 29.0289 -1.7571 1.3286 #88.198.48.245 185.248.189.10 2 u 55 64 377 26.1575 0.5048 1.4012 -78.47.93.191 205.46.178.169 2 u 59 64 377 26.7723 -0.6333 1.0814 -185.242.112.53 131.188.3.222 2 u 57 64 377 22.7688 0.1649 1.2886 -3.64.117.201 124.216.164.14 2 u 52 64 377 23.9026 0.5834 1.0397 +162.159.200.1 10.67.8.182 3 u 58 64 377 12.6313 -0.5031 0.5763 -195.201.137.97 168.239.11.197 2 u 51 64 377 27.0073 2.0779 2.1155 -5.1.73.2 131.188.3.222 2 u 55 64 377 23.2717 1.5362 0.7830 *185.248.189.10 .GPS. 1 u 49 64 373 26.7541 1.2858 1.8044 #116.202.14.29 124.216.164.14 2 u 58 64 377 25.7470 0.8951 3.7003 #164.68.124.74 130.133.1.10 2 u 51 64 377 30.8641 0.8981 1.3278 -129.70.132.33 129.70.137.82 2 u 52 64 377 33.2658 2.6236 0.8734 +192.53.103.108 .PTB. 1 u 55 64 377 30.9117 1.5350 0.7222 -178.18.255.231 131.188.3.222 2 u 48 64 377 25.6985 1.5781 1.7870 storcli_-cALL-eALL-sALL_show_all.json000066400000000000000000000245121471704556300345530ustar00rootroot00000000000000prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/mock/fixtures{ "Controllers":[ { "Command Status" : { "CLI Version" : "007.1616.0000.0000 Dec 24, 2020", "Operating system" : "Linux 5.10.0-23-amd64", "Controller" : 0, "Status" : "Success", "Description" : "Show Drive Information Succeeded." }, "Response Data" : { "Drive /c0/e252/s0" : [ { "EID:Slt" : "252:0", "DID" : 2, "State" : "Onln", "DG" : 0, "Size" : "893.750 GB", "Intf" : "SATA", "Med" : "SSD", "SED" : "N", "PI" : "N", "SeSz" : "512B", "Model" : "Micron_5200_MTFDDAK960TDC", "Sp" : "U", "Type" : "-" } ], "Drive /c0/e252/s0 - Detailed Information" : { "Drive /c0/e252/s0 State" : { "Shield Counter" : 0, "Media Error Count" : 0, "Other Error Count" : 0, "Drive Temperature" : " 21C (69.80 F)", "Predictive Failure Count" : 0, "S.M.A.R.T alert flagged by drive" : "No" }, "Drive /c0/e252/s0 Device attributes" : { "SN" : "20032656D641", "Manufacturer Id" : "ATA ", "Model Number" : "Micron_5200_MTFDDAK960TDC", "NAND Vendor" : "NA", "WWN" : "500A07512656D641", "Firmware Revision" : " D1MU004", "Raw size" : "894.252 GB [0x6fc81ab0 Sectors]", "Coerced size" : "893.750 GB [0x6fb80000 Sectors]", "Non Coerced size" : "893.752 GB [0x6fb81ab0 Sectors]", "Device Speed" : "6.0Gb/s", "Link Speed" : "6.0Gb/s", "NCQ setting" : "Enabled", "Write Cache" : "N/A", "Logical Sector Size" : "512B", "Physical Sector Size" : "4 KB", "Connector Name" : "Port 0 - 3 x1" }, "Drive /c0/e252/s0 Policies/Settings" : { "Drive position" : "DriveGroup:0, Span:0, Row:0", "Enclosure position" : "1", "Connected Port Number" : "0(path0) ", "Sequence Number" : 2, "Commissioned Spare" : "No", "Emergency Spare" : "No", "Last Predictive Failure Event Sequence Number" : 0, "Successful diagnostics completion on" : "N/A", "FDE Type" : "None", "SED Capable" : "No", "SED Enabled" : "No", "Secured" : "No", "Cryptographic Erase Capable" : "Yes", "Sanitize Support" : "Not supported", "Locked" : "No", "Needs EKM Attention" : "No", "PI Eligible" : "No", "Certified" : "No", "Wide Port Capable" : "No", "Multipath" : "No", "Port Information" : [ { "Port" : 0, "Status" : "Active", "Linkspeed" : "6.0Gb/s", "SAS address" : "0x4433221102000000" } ] }, "Inquiry Data" : "40 04 ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00 00 00 00 00 20 20 20 20 20 20 20 20 30 32 33 30 36 32 36 35 36 44 31 34 00 00 00 00 00 00 44 20 4d 31 30 55 34 30 69 4d 72 63 6e 6f 35 5f 30 32 5f 30 54 4d 44 46 41 44 39 4b 30 36 44 54 20 43 20 20 20 20 20 20 20 20 20 20 20 20 20 20 10 80 00 40 00 2f 01 40 00 00 00 00 06 00 ff 3f 10 00 3f 00 10 fc fb 00 10 fd ff ff ff 0f 00 00 07 00 " }, "Drive /c0/e252/s1" : [ { "EID:Slt" : "252:1", "DID" : 3, "State" : "Onln", "DG" : 0, "Size" : "893.750 GB", "Intf" : "SATA", "Med" : "SSD", "SED" : "N", "PI" : "N", "SeSz" : "512B", "Model" : "Micron_5200_MTFDDAK960TDC", "Sp" : "U", "Type" : "-" } ], "Drive /c0/e252/s1 - Detailed Information" : { "Drive /c0/e252/s1 State" : { "Shield Counter" : 0, "Media Error Count" : 0, "Other Error Count" : 0, "Drive Temperature" : " 21C (69.80 F)", "Predictive Failure Count" : 0, "S.M.A.R.T alert flagged by drive" : "No" }, "Drive /c0/e252/s1 Device attributes" : { "SN" : "20032656D63C", "Manufacturer Id" : "ATA ", "Model Number" : "Micron_5200_MTFDDAK960TDC", "NAND Vendor" : "NA", "WWN" : "500A07512656D63C", "Firmware Revision" : " D1MU004", "Raw size" : "894.252 GB [0x6fc81ab0 Sectors]", "Coerced size" : "893.750 GB [0x6fb80000 Sectors]", "Non Coerced size" : "893.752 GB [0x6fb81ab0 Sectors]", "Device Speed" : "6.0Gb/s", "Link Speed" : "6.0Gb/s", "NCQ setting" : "Enabled", "Write Cache" : "N/A", "Logical Sector Size" : "512B", "Physical Sector Size" : "4 KB", "Connector Name" : "Port 0 - 3 x1" }, "Drive /c0/e252/s1 Policies/Settings" : { "Drive position" : "DriveGroup:0, Span:0, Row:1", "Enclosure position" : "0", "Connected Port Number" : "1(path0) ", "Sequence Number" : 2, "Commissioned Spare" : "No", "Emergency Spare" : "No", "Last Predictive Failure Event Sequence Number" : 0, "Successful diagnostics completion on" : "N/A", "FDE Type" : "None", "SED Capable" : "No", "SED Enabled" : "No", "Secured" : "No", "Cryptographic Erase Capable" : "Yes", "Sanitize Support" : "Not supported", "Locked" : "No", "Needs EKM Attention" : "No", "PI Eligible" : "No", "Certified" : "No", "Wide Port Capable" : "No", "Multipath" : "No", "Port Information" : [ { "Port" : 0, "Status" : "Active", "Linkspeed" : "6.0Gb/s", "SAS address" : "0x4433221103000000" } ] }, "Inquiry Data" : "40 04 ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00 00 00 00 00 20 20 20 20 20 20 20 20 30 32 33 30 36 32 36 35 36 44 43 33 00 00 00 00 00 00 44 20 4d 31 30 55 34 30 69 4d 72 63 6e 6f 35 5f 30 32 5f 30 54 4d 44 46 41 44 39 4b 30 36 44 54 20 43 20 20 20 20 20 20 20 20 20 20 20 20 20 20 10 80 00 40 00 2f 01 40 00 00 00 00 06 00 ff 3f 10 00 3f 00 10 fc fb 00 10 fd ff ff ff 0f 00 00 07 00 " }, "Drive /c0/e252/s4" : [ { "EID:Slt" : "252:4", "DID" : 7, "State" : "Onln", "DG" : 0, "Size" : "893.750 GB", "Intf" : "SATA", "Med" : "SSD", "SED" : "N", "PI" : "N", "SeSz" : "512B", "Model" : "Micron_5200_MTFDDAK960TDC", "Sp" : "U", "Type" : "-" } ], "Drive /c0/e252/s4 - Detailed Information" : { "Drive /c0/e252/s4 State" : { "Shield Counter" : 0, "Media Error Count" : 0, "Other Error Count" : 0, "Drive Temperature" : " 21C (69.80 F)", "Predictive Failure Count" : 0, "S.M.A.R.T alert flagged by drive" : "No" }, "Drive /c0/e252/s4 Device attributes" : { "SN" : "20032656D649", "Manufacturer Id" : "ATA ", "Model Number" : "Micron_5200_MTFDDAK960TDC", "NAND Vendor" : "NA", "WWN" : "500A07512656D649", "Firmware Revision" : " D1MU004", "Raw size" : "894.252 GB [0x6fc81ab0 Sectors]", "Coerced size" : "893.750 GB [0x6fb80000 Sectors]", "Non Coerced size" : "893.752 GB [0x6fb81ab0 Sectors]", "Device Speed" : "6.0Gb/s", "Link Speed" : "6.0Gb/s", "NCQ setting" : "Enabled", "Write Cache" : "N/A", "Logical Sector Size" : "512B", "Physical Sector Size" : "4 KB", "Connector Name" : "Port 4 - 7 x1" }, "Drive /c0/e252/s4 Policies/Settings" : { "Drive position" : "DriveGroup:0, Span:0, Row:3", "Enclosure position" : "0", "Connected Port Number" : "2(path0) ", "Sequence Number" : 2, "Commissioned Spare" : "No", "Emergency Spare" : "No", "Last Predictive Failure Event Sequence Number" : 0, "Successful diagnostics completion on" : "N/A", "FDE Type" : "None", "SED Capable" : "No", "SED Enabled" : "No", "Secured" : "No", "Cryptographic Erase Capable" : "Yes", "Sanitize Support" : "Not supported", "Locked" : "No", "Needs EKM Attention" : "No", "PI Eligible" : "No", "Certified" : "No", "Wide Port Capable" : "No", "Multipath" : "No", "Port Information" : [ { "Port" : 0, "Status" : "Active", "Linkspeed" : "6.0Gb/s", "SAS address" : "0x4433221106000000" } ] }, "Inquiry Data" : "40 04 ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00 00 00 00 00 20 20 20 20 20 20 20 20 30 32 33 30 36 32 36 35 36 44 39 34 00 00 00 00 00 00 44 20 4d 31 30 55 34 30 69 4d 72 63 6e 6f 35 5f 30 32 5f 30 54 4d 44 46 41 44 39 4b 30 36 44 54 20 43 20 20 20 20 20 20 20 20 20 20 20 20 20 20 10 80 00 40 00 2f 01 40 00 00 00 00 06 00 ff 3f 10 00 3f 00 10 fc fb 00 10 fd ff ff ff 0f 00 00 07 00 " }, "Drive /c0/e252/s5" : [ { "EID:Slt" : "252:5", "DID" : 6, "State" : "Onln", "DG" : 0, "Size" : "893.750 GB", "Intf" : "SATA", "Med" : "SSD", "SED" : "N", "PI" : "N", "SeSz" : "512B", "Model" : "Micron_5200_MTFDDAK960TDC", "Sp" : "U", "Type" : "-" } ], "Drive /c0/e252/s5 - Detailed Information" : { "Drive /c0/e252/s5 State" : { "Shield Counter" : 0, "Media Error Count" : 0, "Other Error Count" : 0, "Drive Temperature" : " 21C (69.80 F)", "Predictive Failure Count" : 0, "S.M.A.R.T alert flagged by drive" : "No" }, "Drive /c0/e252/s5 Device attributes" : { "SN" : "20032656D5D1", "Manufacturer Id" : "ATA ", "Model Number" : "Micron_5200_MTFDDAK960TDC", "NAND Vendor" : "NA", "WWN" : "500A07512656D5D1", "Firmware Revision" : " D1MU004", "Raw size" : "894.252 GB [0x6fc81ab0 Sectors]", "Coerced size" : "893.750 GB [0x6fb80000 Sectors]", "Non Coerced size" : "893.752 GB [0x6fb81ab0 Sectors]", "Device Speed" : "6.0Gb/s", "Link Speed" : "6.0Gb/s", "NCQ setting" : "Enabled", "Write Cache" : "N/A", "Logical Sector Size" : "512B", "Physical Sector Size" : "4 KB", "Connector Name" : "Port 4 - 7 x1" }, "Drive /c0/e252/s5 Policies/Settings" : { "Drive position" : "DriveGroup:0, Span:0, Row:2", "Enclosure position" : "0", "Connected Port Number" : "3(path0) ", "Sequence Number" : 2, "Commissioned Spare" : "No", "Emergency Spare" : "No", "Last Predictive Failure Event Sequence Number" : 0, "Successful diagnostics completion on" : "N/A", "FDE Type" : "None", "SED Capable" : "No", "SED Enabled" : "No", "Secured" : "No", "Cryptographic Erase Capable" : "Yes", "Sanitize Support" : "Not supported", "Locked" : "No", "Needs EKM Attention" : "No", "PI Eligible" : "No", "Certified" : "No", "Wide Port Capable" : "No", "Multipath" : "No", "Port Information" : [ { "Port" : 0, "Status" : "Active", "Linkspeed" : "6.0Gb/s", "SAS address" : "0x4433221107000000" } ] }, "Inquiry Data" : "40 04 ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00 00 00 00 00 20 20 20 20 20 20 20 20 30 32 33 30 36 32 36 35 35 44 31 44 00 00 00 00 00 00 44 20 4d 31 30 55 34 30 69 4d 72 63 6e 6f 35 5f 30 32 5f 30 54 4d 44 46 41 44 39 4b 30 36 44 54 20 43 20 20 20 20 20 20 20 20 20 20 20 20 20 20 10 80 00 40 00 2f 01 40 00 00 00 00 06 00 ff 3f 10 00 3f 00 10 fc fb 00 10 fd ff ff ff 0f 00 00 07 00 " } } } ] } storcli_-cALL_show_all.json000066400000000000000000000404151471704556300332070ustar00rootroot00000000000000prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/mock/fixtures{ "Controllers":[ { "Command Status" : { "CLI Version" : "007.1616.0000.0000 Dec 24, 2020", "Operating system" : "Linux 5.10.0-23-amd64", "Controller" : 0, "Status" : "Success", "Description" : "None" }, "Response Data" : { "Basics" : { "Controller" : 0, "Model" : "PRAID EP420i", "Serial Number" : "0000000061913681", "Current Controller Date/Time" : "05/19/2023, 19:18:24", "Current System Date/time" : "05/19/2023, 21:18:13", "SAS Address" : "500300570256f130", "PCI Address" : "00:02:00:00", "Mfg Date" : "00/00/00", "Rework Date" : "00/00/00", "Revision No" : " " }, "Version" : { "Firmware Package Build" : "24.21.0-0076", "Firmware Version" : "4.680.00-8417", "CPLD Version" : "26515-01A", "Bios Version" : "6.36.00.3_4.19.08.00_0x06180203", "HII Version" : "03.25.05.10", "Ctrl-R Version" : "5.19-0603", "Preboot CLI Version" : "01.07-05:#%0000", "NVDATA Version" : "3.1705.00-0018", "Boot Block Version" : "3.07.00.00-0003", "Driver Name" : "megaraid_sas", "Driver Version" : "07.714.04.00-rc1" }, "Bus" : { "Vendor Id" : 4096, "Device Id" : 93, "SubVendor Id" : 5940, "SubDevice Id" : 4626, "Host Interface" : "PCI-E", "Device Interface" : "SAS-12G", "Bus Number" : 2, "Device Number" : 0, "Function Number" : 0, "Domain ID" : 0 }, "Pending Images in Flash" : { "Image name" : "No pending images" }, "Status" : { "Controller Status" : "Optimal", "Memory Correctable Errors" : 0, "Memory Uncorrectable Errors" : 0, "ECC Bucket Count" : 0, "Any Offline VD Cache Preserved" : "No", "BBU Status" : 0, "PD Firmware Download in progress" : "No", "Support PD Firmware Download" : "Yes", "Lock Key Assigned" : "No", "Failed to get lock key on bootup" : "No", "Lock key has not been backed up" : "No", "Bios was not detected during boot" : "No", "Controller must be rebooted to complete security operation" : "No", "A rollback operation is in progress" : "No", "At least one PFK exists in NVRAM" : "Yes", "SSC Policy is WB" : "No", "Controller has booted into safe mode" : "No", "Controller shutdown required" : "No", "Controller has booted into certificate provision mode" : "No" }, "Supported Adapter Operations" : { "Rebuild Rate" : "Yes", "CC Rate" : "Yes", "BGI Rate " : "Yes", "Reconstruct Rate" : "Yes", "Patrol Read Rate" : "Yes", "Alarm Control" : "No", "Cluster Support" : "No", "BBU" : "Yes", "Spanning" : "Yes", "Dedicated Hot Spare" : "Yes", "Revertible Hot Spares" : "Yes", "Foreign Config Import" : "Yes", "Self Diagnostic" : "Yes", "Allow Mixed Redundancy on Array" : "No", "Global Hot Spares" : "Yes", "Deny SCSI Passthrough" : "No", "Deny SMP Passthrough" : "No", "Deny STP Passthrough" : "No", "Support more than 8 Phys" : "Yes", "FW and Event Time in GMT" : "No", "Support Enhanced Foreign Import" : "Yes", "Support Enclosure Enumeration" : "Yes", "Support Allowed Operations" : "Yes", "Abort CC on Error" : "Yes", "Support Multipath" : "Yes", "Support Odd & Even Drive count in RAID1E" : "No", "Support Security" : "No", "Support Config Page Model" : "Yes", "Support the OCE without adding drives" : "Yes", "Support EKM" : "No", "Snapshot Enabled" : "No", "Support PFK" : "Yes", "Support PI" : "Yes", "Support LDPI Type1" : "No", "Support LDPI Type2" : "No", "Support LDPI Type3" : "No", "Support Ld BBM Info" : "No", "Support Shield State" : "Yes", "Block SSD Write Disk Cache Change" : "No", "Support Suspend Resume BG ops" : "Yes", "Support Emergency Spares" : "No", "Support Set Link Speed" : "Yes", "Support Boot Time PFK Change" : "No", "Support JBOD" : "Yes", "Disable Online PFK Change" : "No", "Support Perf Tuning" : "Yes", "Support SSD PatrolRead" : "Yes", "Real Time Scheduler" : "Yes", "Support Reset Now" : "Yes", "Support Emulated Drives" : "Yes", "Headless Mode" : "Yes", "Dedicated HotSpares Limited" : "No", "Point In Time Progress" : "Yes", "Extended LD" : "Yes", "Support Uneven span " : "No", "Support Config Auto Balance" : "No", "Support Maintenance Mode" : "No", "Support Diagnostic results" : "Yes", "Support Ext Enclosure" : "Yes", "Support Sesmonitoring" : "Yes", "Support SecurityonJBOD" : "Yes", "Support ForceFlash" : "Yes", "Support DisableImmediateIO" : "Yes", "Support LargeIOSupport" : "Yes", "Support DrvActivityLEDSetting" : "Yes", "Support FlushWriteVerify" : "Yes", "Support CPLDUpdate" : "Yes", "Support ForceTo512e" : "Yes", "Support discardCacheDuringLDDelete" : "Yes", "Support JBOD Write cache" : "No", "Support Large QD Support" : "No", "Support Ctrl Info Extended" : "No", "Support IButton less" : "No", "Support AES Encryption Algorithm" : "No", "Support Encrypted MFC" : "No", "Support Snapdump" : "No", "Support Force Personality Change" : "No", "Support Dual Fw Image" : "No", "Support PSOC Update" : "No", "Support Secure Boot" : "No", "Support Debug Queue" : "Yes", "Support Least Latency Mode" : "Yes", "Support OnDemand Snapdump" : "No", "Support Clear Snapdump" : "No", "Support PHY current speed" : "No", "Support Lane current speed" : "No", "Support NVMe Width" : "No", "Support Lane DeviceType" : "No", "Support Extended Drive performance Monitoring" : "No", "Support NVMe Repair" : "No", "Support Platform Security" : "No", "Support None Mode Params" : "No", "Support Extended Controller Property" : "No", "Support Smart Poll Interval for DirectAttached" : "No", "Support Write Journal Pinning" : "No", "Support SMP Passthru with Port Number" : "No", "Support NVMe Init Error Device ConnectorIndex" : "No" }, "Supported PD Operations" : { "Force Online" : "Yes", "Force Offline" : "Yes", "Force Rebuild" : "Yes", "Deny Force Failed" : "No", "Deny Force Good/Bad" : "No", "Deny Missing Replace" : "No", "Deny Clear" : "No", "Deny Locate" : "No", "Support Power State" : "Yes", "Set Power State For Cfg" : "No", "Support T10 Power State" : "No", "Support Temperature" : "Yes", "NCQ" : "Yes", "Support Max Rate SATA" : "No", "Support Degraded Media" : "No", "Support Parallel FW Update" : "Yes", "Support Drive Crypto Erase" : "Yes", "Support SSD Wear Gauge" : "No" }, "Supported VD Operations" : { "Read Policy" : "Yes", "Write Policy" : "Yes", "IO Policy" : "Yes", "Access Policy" : "Yes", "Disk Cache Policy" : "Yes", "Reconstruction" : "Yes", "Deny Locate" : "No", "Deny CC" : "No", "Allow Ctrl Encryption" : "No", "Enable LDBBM" : "Yes", "Support FastPath" : "Yes", "Performance Metrics" : "Yes", "Power Savings" : "No", "Support Powersave Max With Cache" : "No", "Support Breakmirror" : "Yes", "Support SSC WriteBack" : "No", "Support SSC Association" : "No", "Support VD Hide" : "Yes", "Support VD Cachebypass" : "Yes", "Support VD discardCacheDuringLDDelete" : "Yes", "Support VD Scsi Unmap" : "No" }, "Advanced Software Option" : [ { "Adv S/W Opt" : "MegaRAID FastPath", " Time Remaining" : " Unlimited", " Mode" : " -" }, { "Adv S/W Opt" : "MegaRAID RAID6", " Time Remaining" : " Unlimited", " Mode" : " -" }, { "Adv S/W Opt" : "MegaRAID RAID5", " Time Remaining" : " Unlimited", " Mode" : " -" } ], "Safe ID" : " JT7NM98TL2LU1UQ3TRWVCU8N97FB79EGK7P4H12Z", "HwCfg" : { "ChipRevision" : " C0", "BatteryFRU" : "N/A", "Front End Port Count" : 0, "Backend Port Count" : 8, "BBU" : "Present", "Alarm" : "Absent", "Serial Debugger" : "Present", "NVRAM Size" : "32KB", "Flash Size" : "16MB", "On Board Memory Size" : "2048MB", "CacheVault Flash Size" : "8.000 GB", "TPM" : "Absent", "Upgrade Key" : "Absent", "On Board Expander" : "Absent", "Temperature Sensor for ROC" : "Present", "Temperature Sensor for Controller" : "Absent", "Upgradable CPLD" : "Present", "Upgradable PSOC" : "Absent", "Current Size of CacheCade (GB)" : 0, "Current Size of FW Cache (MB)" : 1715, "ROC temperature(Degree Celsius)" : 52 }, "Policies" : { "Policies Table" : [ { "Policy" : "Predictive Fail Poll Interval", "Current" : "300 sec", "Default" : "" }, { "Policy" : "Interrupt Throttle Active Count", "Current" : "16", "Default" : "" }, { "Policy" : "Interrupt Throttle Completion", "Current" : "50 us", "Default" : "" }, { "Policy" : "Rebuild Rate", "Current" : "30 %", "Default" : "30%" }, { "Policy" : "PR Rate", "Current" : "20 %", "Default" : "30%" }, { "Policy" : "BGI Rate", "Current" : "30 %", "Default" : "30%" }, { "Policy" : "Check Consistency Rate", "Current" : "30 %", "Default" : "30%" }, { "Policy" : "Reconstruction Rate", "Current" : "30 %", "Default" : "30%" }, { "Policy" : "Cache Flush Interval", "Current" : "4s", "Default" : "" } ], "Flush Time(Default)" : "4s", "Drive Coercion Mode" : "none", "Auto Rebuild" : "On", "Battery Warning" : "On", "ECC Bucket Size" : 15, "ECC Bucket Leak Rate (hrs)" : 24, "Restore Hot Spare on Insertion" : "On", "Expose Enclosure Devices" : "Off", "Maintain PD Fail History" : "On", "Reorder Host Requests" : "On", "Auto detect BackPlane" : "SGPIO/i2c SEP", "Load Balance Mode" : "Auto", "Security Key Assigned" : "Off", "Disable Online Controller Reset" : "Off", "Use drive activity for locate" : "Off" }, "Boot" : { "BIOS Enumerate VDs" : 1, "Stop BIOS on Error" : "Off", "Delay during POST" : 0, "Spin Down Mode" : "None", "Enable Ctrl-R" : "Yes", "Enable Web BIOS" : "No", "Enable PreBoot CLI" : "No", "Enable BIOS" : "Yes", "Max Drives to Spinup at One Time" : 2, "Maximum number of direct attached drives to spin up in 1 min" : 20, "Delay Among Spinup Groups (sec)" : 6, "Allow Boot with Preserved Cache" : "Off" }, "High Availability" : { "Topology Type" : "None", "Cluster Permitted" : "No", "Cluster Active" : "No" }, "Defaults" : { "Phy Polarity" : 0, "Phy PolaritySplit" : 0, "Strip Size" : "256 KB", "Write Policy" : "WB", "Read Policy" : "RA", "Cache When BBU Bad" : "Off", "Cached IO" : "Off", "VD PowerSave Policy" : "Controller Defined", "Default spin down time (mins)" : 30, "Coercion Mode" : "None", "ZCR Config" : "Unknown", "Max Chained Enclosures" : 16, "Direct PD Mapping" : "No", "Restore Hot Spare on Insertion" : "Yes", "Expose Enclosure Devices" : "No", "Maintain PD Fail History" : "Yes", "Zero Based Enclosure Enumeration" : "No", "Disable Puncturing" : "No", "EnableLDBBM" : "Yes", "DisableHII" : "No", "Un-Certified Hard Disk Drives" : "Allow", "SMART Mode" : "Mode 6", "Enable LED Header" : "No", "LED Show Drive Activity" : "Yes", "Dirty LED Shows Drive Activity" : "No", "EnableCrashDump" : "Yes", "Disable Online Controller Reset" : "No", "Treat Single span R1E as R10" : "No", "Power Saving option" : "Enabled", "TTY Log In Flash" : "Yes", "Auto Enhanced Import" : "No", "BreakMirror RAID Support" : "single span R1", "Disable Join Mirror" : "Yes", "Enable Shield State" : "Yes", "Time taken to detect CME" : "60 sec" }, "Capabilities" : { "Supported Drives" : "SAS, SATA", "RAID Level Supported" : "RAID0, RAID1(2 or more drives), RAID5, RAID6, RAID00, RAID10(2 or more drives per span), RAID50, RAID60", "Enable JBOD" : "No", "Mix in Enclosure" : "Allowed", "Mix of SAS/SATA of HDD type in VD" : "Not Allowed", "Mix of SAS/SATA of SSD type in VD" : "Not Allowed", "Mix of SSD/HDD in VD" : "Not Allowed", "SAS Disable" : "No", "Max Arms Per VD" : 32, "Max Spans Per VD" : 8, "Max Arrays" : 128, "Max VD per array" : 16, "Max Number of VDs" : 64, "Max Parallel Commands" : 928, "Max SGE Count" : 60, "Max Data Transfer Size" : "8192 sectors", "Max Strips PerIO" : 128, "Max Configurable CacheCade Size(GB)" : 0, "Max Transportable DGs" : 0, "Enable Snapdump" : "No", "Enable SCSI Unmap" : "Yes", "FDE Drive Mix Support" : "No", "Min Strip Size" : "64 KB", "Max Strip Size" : "1.000 MB" }, "Scheduled Tasks" : { "Consistency Check Reoccurrence" : "168 hrs", "Next Consistency check launch" : "NA", "Patrol Read Reoccurrence" : "168 hrs", "Next Patrol Read launch" : "05/20/2023, 03:00:00", "Battery learn Reoccurrence" : "670 hrs", "Next Battery Learn" : "06/03/2023, 10:00:00", "OEMID" : "FSC" }, "Security Protocol properties" : { "Security Protocol" : "None" }, "Drive Groups" : 1, "TOPOLOGY" : [ { "DG" : 0, "Arr" : "-", "Row" : "-", "EID:Slot" : "-", "DID" : "-", "Type" : "RAID6", "State" : "Optl", "BT" : "N", "Size" : "1.745 TB", "PDC" : "dflt", "PI" : "N", "SED" : "N", "DS3" : "none", "FSpace" : "N", "TR" : "N" }, { "DG" : 0, "Arr" : 0, "Row" : "-", "EID:Slot" : "-", "DID" : "-", "Type" : "RAID6", "State" : "Optl", "BT" : "N", "Size" : "1.745 TB", "PDC" : "dflt", "PI" : "N", "SED" : "N", "DS3" : "none", "FSpace" : "N", "TR" : "N" }, { "DG" : 0, "Arr" : 0, "Row" : 0, "EID:Slot" : "252:0", "DID" : 2, "Type" : "DRIVE", "State" : "Onln", "BT" : "N", "Size" : "893.750 GB", "PDC" : "dflt", "PI" : "N", "SED" : "N", "DS3" : "none", "FSpace" : "-", "TR" : "N" }, { "DG" : 0, "Arr" : 0, "Row" : 1, "EID:Slot" : "252:1", "DID" : 3, "Type" : "DRIVE", "State" : "Onln", "BT" : "N", "Size" : "893.750 GB", "PDC" : "dflt", "PI" : "N", "SED" : "N", "DS3" : "none", "FSpace" : "-", "TR" : "N" }, { "DG" : 0, "Arr" : 0, "Row" : 2, "EID:Slot" : "252:5", "DID" : 6, "Type" : "DRIVE", "State" : "Onln", "BT" : "N", "Size" : "893.750 GB", "PDC" : "dflt", "PI" : "N", "SED" : "N", "DS3" : "none", "FSpace" : "-", "TR" : "N" }, { "DG" : 0, "Arr" : 0, "Row" : 3, "EID:Slot" : "252:4", "DID" : 7, "Type" : "DRIVE", "State" : "Onln", "BT" : "N", "Size" : "893.750 GB", "PDC" : "dflt", "PI" : "N", "SED" : "N", "DS3" : "none", "FSpace" : "-", "TR" : "N" } ], "Virtual Drives" : 1, "VD LIST" : [ { "DG/VD" : "0/0", "TYPE" : "RAID6", "State" : "Optl", "Access" : "RW", "Consist" : "Yes", "Cache" : "RWBD", "Cac" : "-", "sCC" : "OFF", "Size" : "1.745 TB", "Name" : "" } ], "Physical Drives" : 4, "PD LIST" : [ { "EID:Slt" : "252:0", "DID" : 2, "State" : "Onln", "DG" : 0, "Size" : "893.750 GB", "Intf" : "SATA", "Med" : "SSD", "SED" : "N", "PI" : "N", "SeSz" : "512B", "Model" : "Micron_5200_MTFDDAK960TDC", "Sp" : "U", "Type" : "-" }, { "EID:Slt" : "252:1", "DID" : 3, "State" : "Onln", "DG" : 0, "Size" : "893.750 GB", "Intf" : "SATA", "Med" : "SSD", "SED" : "N", "PI" : "N", "SeSz" : "512B", "Model" : "Micron_5200_MTFDDAK960TDC", "Sp" : "U", "Type" : "-" }, { "EID:Slt" : "252:4", "DID" : 7, "State" : "Onln", "DG" : 0, "Size" : "893.750 GB", "Intf" : "SATA", "Med" : "SSD", "SED" : "N", "PI" : "N", "SeSz" : "512B", "Model" : "Micron_5200_MTFDDAK960TDC", "Sp" : "U", "Type" : "-" }, { "EID:Slt" : "252:5", "DID" : 6, "State" : "Onln", "DG" : 0, "Size" : "893.750 GB", "Intf" : "SATA", "Med" : "SSD", "SED" : "N", "PI" : "N", "SeSz" : "512B", "Model" : "Micron_5200_MTFDDAK960TDC", "Sp" : "U", "Type" : "-" } ], "Enclosures" : 1, "Enclosure LIST" : [ { "EID" : 252, "State" : "OK", "Slots" : 8, "PD" : 4, "PS" : 0, "Fans" : 0, "TSs" : 0, "Alms" : 0, "SIM" : 1, "Port#" : "-", "ProdID" : "SGPIO", "VendorSpecific" : " " } ], "Cachevault_Info" : [ { "Model" : "CVPM02", "State" : "Optimal", "Temp" : "18C", "Mode" : "-", "MfgDate" : "2019/07/06" } ] } } ] } zfs_list_-p_-H_-t_snapshot_-o_name,used,creation000066400000000000000000000002361471704556300372020ustar00rootroot00000000000000prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/mock/fixtureszpool/vol/0@snapshot-name 0 1685082363 zpool/vol/0@snapshot-name 128 1685085436 zpool/vol1@snapshot-name 0 1685099827 zpool/vol1@snapshot-name 256 1685100606 prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/mock/ntpq000077500000000000000000000005331471704556300251500ustar00rootroot00000000000000#!/usr/bin/env bash # ntpq mock script for testing ntpd_metrics.py textfile collector fixtures_dir=$(dirname "$0")/fixtures case $@ in "-np") cat "${fixtures_dir}"/ntpq_-np ;; "-c rv 0 offset,sys_jitter,rootdisp,rootdelay") cat "${fixtures_dir}"/ntpq_-c_rv_0_offset,sys_jitter,rootdisp,rootdelay ;; esac prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/mock/storcli000077500000000000000000000005441471704556300256470ustar00rootroot00000000000000#!/usr/bin/env bash # storcli mock script for testing storcli textfile collector fixtures_dir=$(dirname "$0")/fixtures case $@ in "/cALL show all J nolog") cat "${fixtures_dir}"/storcli_-cALL_show_all.json ;; "/cALL/eALL/sALL show all J nolog") cat "${fixtures_dir}"/storcli_-cALL-eALL-sALL_show_all.json ;; esac prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/mock/zfs000077500000000000000000000002671471704556300247740ustar00rootroot00000000000000#!/usr/bin/env bash # zfs mock script for testing zfs snapshot provider fixtures_dir=$(dirname "$0")/fixtures cat "${fixtures_dir}/zfs_list_-p_-H_-t_snapshot_-o_name,used,creation" prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/multipathd_info000077500000000000000000000006511471704556300264240ustar00rootroot00000000000000#!/usr/bin/env sh # # Description: Expose device mapper multipathing metrics from multipathd. # # Author: Saket Sinha echo '# HELP node_dmpath_info State info for dev-mapper path' echo '# TYPE node_dmpath_info gauge' /sbin/multipathd show paths format '%d %t %T' | /usr/bin/awk '{ if ( NR > 1) {print "node_dmpath_info{device=\""$1"\"," "dm_path_state=\""$2"\"," "path_state=\""$3"\"}" " 1"}}' prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/needrestart_info.py000066400000000000000000000120671471704556300272210ustar00rootroot00000000000000#!/usr/bin/env python3 """ Description: Expose metrics from needrestart. This script runs needrestart in batch mode. It will never ask for input and will never restart or upgrade anything. Dependencies: python >= 3.5, python3-prometheus-client, needrestart Authors: RomainMou """ import sys import time import subprocess from collections import Counter from enum import Enum from prometheus_client import ( CollectorRegistry, Gauge, generate_latest, ) class KernelStatus(Enum): UNKNOWN = 0 CURRENT = 1 ABI_UPGRADE = 2 VERSION_UPGRADE = 3 class MicroCodeStatus(Enum): UNKNOWN = 0 CURRENT = 1 OBSOLETE = 2 class NeedRestartData: def __init__(self, needrestart_output): # Some default value self.timestamp = int(time.time()) self.version = None self.kernel_status = None self.microcode_status = None self.kernel_current_version = "" self.kernel_expected_version = "" self.microcode_current_version = "" self.microcode_expected_version = "" needrestart_counter = Counter() # Parse the cmd output for line in needrestart_output.splitlines(): key, value = line.split(": ", maxsplit=1) if key == "NEEDRESTART-VER": self.version = value # Kernel informations elif key == "NEEDRESTART-KCUR": self.kernel_current_version = value elif key == "NEEDRESTART-KEXP": self.kernel_expected_version = value elif key == "NEEDRESTART-KSTA": self.kernel_status = KernelStatus(int(value)) # Microcode informations elif key == "NEEDRESTART-UCCUR": self.microcode_current_version = value elif key == "NEEDRESTART-UCEXP": self.microcode_expected_version = value elif key == "NEEDRESTART-UCSTA": self.microcode_status = MicroCodeStatus(int(value)) # Count the others else: needrestart_counter.update({key}) self.services_count = needrestart_counter["NEEDRESTART-SVC"] self.containers_count = needrestart_counter["NEEDRESTART-CONT"] self.sessions_count = needrestart_counter["NEEDRESTART-SESS"] def write_timestamp(registry, needrestart_data): g = Gauge( "needrestart_timestamp_seconds", "information about the version and when it was last run", labelnames=["version"], registry=registry, ) g.labels(needrestart_data.version).set(needrestart_data.timestamp) def write_kernel(registry, needrestart_data): if needrestart_data.kernel_status: e = Gauge( "needrestart_kernel_status_info", "information about the kernel status", labelnames=["current", "expected"], registry=registry, ) e.labels( needrestart_data.kernel_current_version, needrestart_data.kernel_expected_version, ).set(needrestart_data.kernel_status.value) def write_microcode(registry, needrestart_data): if needrestart_data.microcode_status: e = Gauge( "needrestart_microcode_status_info", "information about the microcode status", labelnames=["current", "expected"], registry=registry, ) e.labels( needrestart_data.microcode_current_version, needrestart_data.microcode_expected_version, ).set(needrestart_data.microcode_status.value) def write_services(registry, needrestart_data): g = Gauge( "needrestart_services_total", "number of services requiring a restart", registry=registry, ) g.set(needrestart_data.services_count) def write_containers(registry, needrestart_data): g = Gauge( "needrestart_containers_total", "number of containers requiring a restart", registry=registry, ) g.set(needrestart_data.containers_count) def write_sessions(registry, needrestart_data): g = Gauge( "needrestart_sessions_total", "number of sessions requiring a restart", registry=registry, ) g.set(needrestart_data.sessions_count) def main(): registry = CollectorRegistry() try: needrestart_output = subprocess.run( ["needrestart", "-b"], capture_output=True, text=True, check=True ).stdout needrestart_data = NeedRestartData(needrestart_output) except subprocess.CalledProcessError as e: print(f"Error executing needrestart:\n{e}", file=sys.stderr) sys.exit(1) except Exception as e: print(f"An unexpected error occurred:\n{e}", file=sys.stderr) sys.exit(1) write_timestamp(registry, needrestart_data) write_kernel(registry, needrestart_data) write_microcode(registry, needrestart_data) write_services(registry, needrestart_data) write_containers(registry, needrestart_data) write_sessions(registry, needrestart_data) print(generate_latest(registry).decode(), end="") if __name__ == "__main__": main() prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/node_os_info.sh000077500000000000000000000036411471704556300263120ustar00rootroot00000000000000#!/usr/bin/env sh # # Generate node_os_info and node_os_version metrics on legacy systems # which are not handled by node_exporter's own collector # (e.g. CentOS 6) set -e [ -f /etc/os-release ] && exit 0 [ -f /usr/lib/os-release ] && exit 0 ID="" ID_LIKE="" NAME="" PRETTY_NAME="" VERSION="" VERSION_CODENAME="" VERSION_ID="" VERSION_METRIC="" if [ -f /etc/redhat-release ]; then # CentOS release 6.10 (Final) PRETTY_NAME="$(cat /etc/redhat-release)" if [ -f /etc/centos-release ]; then ID="centos" elif [ -f /etc/oracle-release ]; then ID="ol" fi ID_LIKE="rhel fedora" NAME="$(expr "$PRETTY_NAME" : '\([^ ]*\)')" || true VERSION="$(expr "$PRETTY_NAME" : '.* \([0-9].*\)')" || true VERSION_ID="$(expr "$PRETTY_NAME" : '.* \([0-9][0-9.]*\)')" || true # metric cannot distinguish 6.1 from 6.10, so only keep the integer part VERSION_METRIC="$(expr "$VERSION_ID" : '\([0-9]*\)')" || true elif [ -f /etc/lsb-release ]; then # DISTRIB_ID=Ubuntu # DISTRIB_RELEASE=12.04 # DISTRIB_CODENAME=precise # DISTRIB_DESCRIPTION="Ubuntu 12.04 LTS" # Beware, old versions of CentOS with package "redhat-lsb-core" look like this instead: # LSB_VERSION=base-4.0-amd64:base-4.0-noarch:core-4.0-amd64:core-4.0-noarch # shellcheck disable=SC1091 . /etc/lsb-release ID="$(echo "${DISTRIB_ID}" | tr '[:upper:]' '[:lower:]')" NAME="${DISTRIB_ID}" PRETTY_NAME="${DISTRIB_DESCRIPTION}" VERSION="${DISTRIB_RELEASE} (${DISTRIB_CODENAME})" VERSION_CODENAME="${DISTRIB_CODENAME}" VERSION_ID="${DISTRIB_RELEASE}" # 12.04.1 -> 12.04 VERSION_METRIC="$(expr "$VERSION_ID" : '\([0-9]*\|[0-9]*\.[0-9]*\)')" || true fi [ "$VERSION_METRIC" = "" ] && VERSION_METRIC="0" cat < import re import subprocess import sys from prometheus_client import CollectorRegistry, Gauge, generate_latest # NTP peers status, with no DNS lookups. ntpq_cmd = ['ntpq', '-np', '-W', '255'] ntpq_rv_cmd = ['ntpq', '-c', 'rv 0 offset,sys_jitter,rootdisp,rootdelay'] # Regex to match all of the fields in the output of ntpq -np metrics_fields = [ r'^(?P.)(?P[\w\.:]+)', r'(?P[\w\.:]+)', r'(?P\d+)', r'(?P\w)', r'(?P\d+)', r'(?P\d+)', r'(?P\d+)', r'(?P\d+\.\d+)', r'(?P-?\d+\.\d+)', r'(?P\d+\.\d+)', ] metrics_re = r'\s+'.join(metrics_fields) # Remote types # http://support.ntp.org/bin/view/Support/TroubleshootingNTP remote_types = { 'l': 'local', 'u': 'unicast', 'm': 'multicast', 'b': 'broadcast', '-': 'netaddr', } # Status codes: # http://www.eecis.udel.edu/~mills/ntp/html/decode.html#peer status_types = { ' ': 0, 'x': 1, '.': 2, '-': 3, '+': 4, '#': 5, '*': 6, 'o': 7, } # Run the ntpq command. def get_output(command): try: output = subprocess.check_output(command, stderr=subprocess.DEVNULL) except subprocess.CalledProcessError: return None return output.decode() # Parse raw ntpq lines. def parse_line(line): if re.match(r'\s+remote\s+refid', line): return None if re.match(r'=+', line): return None if re.match(r'.+\.(LOCL|POOL)\.', line): return None if re.match(r'^$', line): return None return re.match(metrics_re, line) # Main function def main(argv): ntpq = get_output(ntpq_cmd) namespace = 'ntpd' registry = CollectorRegistry() peer_status = Gauge('peer_status', 'NTPd metric for peer_status', ['remote', 'reference', 'stratum', 'type'], namespace=namespace, registry=registry) delay_ms = Gauge('delay_milliseconds', 'NTPd metric for delay_milliseconds', ['remote', 'reference'], namespace=namespace, registry=registry) offset_ms = Gauge('offset_milliseconds', 'NTPd metric for offset_milliseconds', ['remote', 'reference'], namespace=namespace, registry=registry) jitter_ms = Gauge('jitter_milliseconds', 'NTPd metric for jitter_milliseconds', ['remote', 'reference'], namespace=namespace, registry=registry) for line in ntpq.split('\n'): metric_match = parse_line(line) if metric_match is None: continue remote = metric_match.group('remote') refid = metric_match.group('refid') stratum = metric_match.group('stratum') remote_type = remote_types[metric_match.group('type')] peer_status.labels(remote, refid, stratum, remote_type).set( status_types[metric_match.group('status')] ) delay_ms.labels(remote, refid).set(metric_match.group('delay')) offset_ms.labels(remote, refid).set(metric_match.group('offset')) jitter_ms.labels(remote, refid).set(metric_match.group('jitter')) ntpq_rv = get_output(ntpq_rv_cmd) for metric in ntpq_rv.split(','): metric_name, metric_value = metric.strip().split('=') g = Gauge(metric_name, 'NTPd metric for {}'.format(metric_name), [], namespace=namespace, registry=registry) g.set(metric_value) print(generate_latest(registry).decode(), end='') # Go go go! if __name__ == "__main__": main(sys.argv[1:]) prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/nvme_metrics.py000077500000000000000000000234301471704556300263600ustar00rootroot00000000000000#!/usr/bin/env python3 """ NVMe device metrics textfile collector. Requires nvme-cli package. Formatted with Black: $ black -l 100 nvme_metrics.py """ import json import os import re import sys import subprocess # Disable automatic addition of _created series. Must be set before importing prometheus_client. os.environ["PROMETHEUS_DISABLE_CREATED_SERIES"] = "true" from prometheus_client import CollectorRegistry, Counter, Gauge, Info, generate_latest # noqa: E402 registry = CollectorRegistry() namespace = "nvme" metrics = { # fmt: off "avail_spare": Gauge( "available_spare_ratio", "Device available spare ratio", ["device"], namespace=namespace, registry=registry, ), "controller_busy_time": Counter( "controller_busy_time_seconds", "Device controller busy time in seconds", ["device"], namespace=namespace, registry=registry, ), "critical_warning": Gauge( "critical_warning", "Device critical warning bitmap field", ["device"], namespace=namespace, registry=registry, ), "data_units_read": Counter( "data_units_read_total", "Number of 512-byte data units read by host, reported in thousands", ["device"], namespace=namespace, registry=registry, ), "data_units_written": Counter( "data_units_written_total", "Number of 512-byte data units written by host, reported in thousands", ["device"], namespace=namespace, registry=registry, ), "device_info": Info( "device", "Device information", ["device", "model", "firmware", "serial"], namespace=namespace, registry=registry, ), "host_read_commands": Counter( "host_read_commands_total", "Device read commands from host", ["device"], namespace=namespace, registry=registry, ), "host_write_commands": Counter( "host_write_commands_total", "Device write commands from host", ["device"], namespace=namespace, registry=registry, ), "media_errors": Counter( "media_errors_total", "Device media errors total", ["device"], namespace=namespace, registry=registry, ), "num_err_log_entries": Counter( "num_err_log_entries_total", "Device error log entry count", ["device"], namespace=namespace, registry=registry, ), # FIXME: The "nvmecli" metric ought to be an Info type, not a Gauge. However, making this change # will result in the metric having a "_info" suffix automatically appended, which is arguably # a breaking change. "nvmecli": Gauge( "nvmecli", "nvme-cli tool information", ["version"], namespace=namespace, registry=registry, ), "percent_used": Gauge( "percentage_used_ratio", "Device percentage used ratio", ["device"], namespace=namespace, registry=registry, ), "physical_size": Gauge( "physical_size_bytes", "Device size in bytes", ["device"], namespace=namespace, registry=registry, ), "power_cycles": Counter( "power_cycles_total", "Device number of power cycles", ["device"], namespace=namespace, registry=registry, ), "power_on_hours": Counter( "power_on_hours_total", "Device power-on hours", ["device"], namespace=namespace, registry=registry, ), "sector_size": Gauge( "sector_size_bytes", "Device sector size in bytes", ["device"], namespace=namespace, registry=registry, ), "spare_thresh": Gauge( "available_spare_threshold_ratio", "Device available spare threshold ratio", ["device"], namespace=namespace, registry=registry, ), "temperature": Gauge( "temperature_celsius", "Device temperature in degrees Celsius", ["device"], namespace=namespace, registry=registry, ), "unsafe_shutdowns": Counter( "unsafe_shutdowns_total", "Device number of unsafe shutdowns", ["device"], namespace=namespace, registry=registry, ), "used_bytes": Gauge( "used_bytes", "Device used size in bytes", ["device"], namespace=namespace, registry=registry, ), # fmt: on } def exec_nvme(*args): """ Execute nvme CLI tool with specified arguments and return captured stdout result. Set LC_ALL=C in child process environment so that the nvme tool does not perform any locale-specific number or date formatting, etc. """ cmd = ["nvme", *args] return subprocess.check_output(cmd, stderr=subprocess.PIPE, env=dict(os.environ, LC_ALL="C")) def exec_nvme_json(*args): """ Execute nvme CLI tool with specified arguments and return parsed JSON output. """ # Note: nvme-cli v2.11 effectively introduced a breaking change by forcing JSON output to always # be verbose. Older versions of nvme-cli optionally produced verbose output if the --verbose # flag was specified. In order to avoid having to handle two different JSON schemas, always # add the --verbose flag. output = exec_nvme(*args, "--output-format", "json", "--verbose") return json.loads(output) def main(): match = re.match(r"^nvme version (\S+)", exec_nvme("version").decode()) if match: cli_version = match.group(1) else: cli_version = "unknown" metrics["nvmecli"].labels(cli_version).set(1) device_list = exec_nvme_json("list") for device in device_list["Devices"]: for subsys in device["Subsystems"]: for ctrl in subsys["Controllers"]: for ns in ctrl["Namespaces"]: device_name = ns["NameSpace"] # FIXME: This metric ought to be refactored into a "controller_info" metric, # since it contains information that is not unique to the namespace. However, # previous versions of this collector erroneously referred to namespaces, e.g. # "nvme0n1", as devices, so preserve the former behaviour for now. metrics["device_info"].labels( device_name, ctrl["ModelNumber"], ctrl["Firmware"], ctrl["SerialNumber"].strip(), ) metrics["sector_size"].labels(device_name).set(ns["SectorSize"]) metrics["physical_size"].labels(device_name).set(ns["PhysicalSize"]) metrics["used_bytes"].labels(device_name).set(ns["UsedBytes"]) # FIXME: The smart-log should only need to be fetched once per controller, not # per namespace. However, in order to preserve legacy metric labels, fetch it # per namespace anyway. Most consumer grade SSDs will only have one namespace. smart_log = exec_nvme_json("smart-log", os.path.join("/dev", device_name)) # Various counters in the NVMe specification are 128-bit, which would have to # discard resolution if converted to a JSON number (i.e., float64_t). Instead, # nvme-cli marshals them as strings. As such, they need to be explicitly cast # to int or float when using them in Counter metrics. metrics["data_units_read"].labels(device_name).inc( int(smart_log["data_units_read"]) ) metrics["data_units_written"].labels(device_name).inc( int(smart_log["data_units_written"]) ) metrics["host_read_commands"].labels(device_name).inc( int(smart_log["host_read_commands"]) ) metrics["host_write_commands"].labels(device_name).inc( int(smart_log["host_write_commands"]) ) metrics["avail_spare"].labels(device_name).set(smart_log["avail_spare"] / 100) metrics["spare_thresh"].labels(device_name).set(smart_log["spare_thresh"] / 100) metrics["percent_used"].labels(device_name).set(smart_log["percent_used"] / 100) metrics["critical_warning"].labels(device_name).set( smart_log["critical_warning"]["value"] ) metrics["media_errors"].labels(device_name).inc(int(smart_log["media_errors"])) metrics["num_err_log_entries"].labels(device_name).inc( int(smart_log["num_err_log_entries"]) ) metrics["power_cycles"].labels(device_name).inc(int(smart_log["power_cycles"])) metrics["power_on_hours"].labels(device_name).inc( int(smart_log["power_on_hours"]) ) metrics["controller_busy_time"].labels(device_name).inc( int(smart_log["controller_busy_time"]) ) metrics["unsafe_shutdowns"].labels(device_name).inc( int(smart_log["unsafe_shutdowns"]) ) # NVMe reports temperature in kelvins; convert it to degrees Celsius. metrics["temperature"].labels(device_name).set(smart_log["temperature"] - 273) if __name__ == "__main__": if os.geteuid() != 0: print("ERROR: script requires root privileges", file=sys.stderr) sys.exit(1) # Check if nvme-cli is installed try: exec_nvme() except FileNotFoundError: print("ERROR: nvme-cli is not installed. Aborting.", file=sys.stderr) sys.exit(1) try: main() except Exception as e: print("ERROR: {}".format(e), file=sys.stderr) sys.exit(1) print(generate_latest(registry).decode(), end="") prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/pacman.sh000077500000000000000000000015721471704556300251110ustar00rootroot00000000000000#!/usr/bin/env bash # # Description: Expose metrics from pacman updates # If installed The bash script *checkupdates*, included with the # *pacman-contrib* package, is used to calculate the number of pending updates. # Otherwise *pacman* is used for calculation. # # Author: Sven Haardiek set -o errexit set -o nounset set -o pipefail if [ -x /usr/bin/checkupdates ] then updates=$(/usr/bin/checkupdates | wc -l) cache=0 else if ! updates=$(/usr/bin/pacman -Qu | wc -l) then updates=0 fi cache=1 fi echo "# HELP updates_pending number of pending updates from pacman" echo "# TYPE updates_pending gauge" echo "pacman_updates_pending $updates" echo "# HELP pacman_updates_pending_from_cache pending updates information are from cache" echo "# TYPE pacman_updates_pending_from_cache gauge" echo "pacman_updates_pending_from_cache $cache" prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/smartmon.py000077500000000000000000000317751471704556300255400ustar00rootroot00000000000000#!/usr/bin/env python3 import argparse import collections import csv import re import shlex import subprocess import sys from prometheus_client import CollectorRegistry, Gauge, generate_latest device_info_re = re.compile(r'^(?P[^:]+?)(?:(?:\sis|):)\s*(?P.*)$') ata_error_count_re = re.compile( r'^Error (\d+) \[\d+\] occurred', re.MULTILINE) self_test_re = re.compile(r'^SMART.*(PASSED|OK)$', re.MULTILINE) device_info_map = { 'Vendor': 'vendor', 'Product': 'product', 'Revision': 'revision', 'Logical Unit id': 'lun_id', 'Model Family': 'model_family', 'Device Model': 'device_model', 'Serial Number': 'serial_number', 'Serial number': 'serial_number', 'Firmware Version': 'firmware_version', } smart_attributes_whitelist = ( 'airflow_temperature_cel', 'command_timeout', 'current_pending_sector', 'end_to_end_error', 'erase_fail_count_total', 'g_sense_error_rate', 'hardware_ecc_recovered', 'host_reads_mib', 'host_reads_32mib', 'host_writes_mib', 'host_writes_32mib', 'load_cycle_count', 'lifetime_writes_gib', 'media_wearout_indicator', 'percent_lifetime_remain', 'wear_leveling_count', 'nand_writes_1gib', 'offline_uncorrectable', 'percent_lifetime_remain', 'power_cycle_count', 'power_on_hours', 'program_fail_count', 'raw_read_error_rate', 'reallocated_event_count', 'reallocated_sector_ct', 'reported_uncorrect', 'sata_downshift_count', 'seek_error_rate', 'spin_retry_count', 'spin_up_time', 'start_stop_count', 'temperature_case', 'temperature_celsius', 'temperature_internal', 'total_bad_block', 'total_lbas_read', 'total_lbas_written', 'total_writes_gib', 'total_reads_gib', 'udma_crc_error_count', 'unsafe_shutdown_count', 'unexpect_power_loss_ct', 'workld_host_reads_perc', 'workld_media_wear_indic', 'workload_minutes', ) registry = CollectorRegistry() namespace = "smartmon" metrics = { "smartctl_version": Gauge( "smartctl_version", "SMART metric smartctl_version", ["version"], namespace=namespace, registry=registry, ), "smartctl_run": Gauge( "smartctl_run", "SMART metric smartctl_run", ["device", "disk"], namespace=namespace, registry=registry, ), "device_active": Gauge( "device_active", "SMART metric device_active", ["device", "disk"], namespace=namespace, registry=registry, ), "device_info": Gauge( "device_info", "SMART metric device_info", [ "device", "disk", "vendor", "product", "revision", "lun_id", "model_family", "device_model", "serial_number", "firmware_version", ], namespace=namespace, registry=registry, ), "device_smart_available": Gauge( "device_smart_available", "SMART metric device_smart_available", ["device", "disk"], namespace=namespace, registry=registry, ), "device_smart_enabled": Gauge( "device_smart_enabled", "SMART metric device_smart_enabled", ["device", "disk"], namespace=namespace, registry=registry, ), "device_smart_healthy": Gauge( "device_smart_healthy", "SMART metric device_smart_healthy", ["device", "disk"], namespace=namespace, registry=registry, ), # SMART attributes - ATA disks only "attr_value": Gauge( "attr_value", "SMART metric attr_value", ["device", "disk", "name"], namespace=namespace, registry=registry, ), "attr_worst": Gauge( "attr_worst", "SMART metric attr_worst", ["device", "disk", "name"], namespace=namespace, registry=registry, ), "attr_threshold": Gauge( "attr_threshold", "SMART metric attr_threshold", ["device", "disk", "name"], namespace=namespace, registry=registry, ), "attr_raw_value": Gauge( "attr_raw_value", "SMART metric attr_raw_value", ["device", "disk", "name"], namespace=namespace, registry=registry, ), "device_errors": Gauge( "device_errors", "SMART metric device_errors", ["device", "disk"], namespace=namespace, registry=registry, ), } SmartAttribute = collections.namedtuple('SmartAttribute', [ 'id', 'name', 'flag', 'value', 'worst', 'threshold', 'type', 'updated', 'when_failed', 'raw_value', ]) class Device(collections.namedtuple('DeviceBase', 'path opts')): """Representation of a device as found by smartctl --scan output.""" @property def type(self): return self.opts.type @property def base_labels(self): return {'device': self.path, 'disk': self.type.partition('+')[2] or '0'} def smartctl_select(self): return ['--device', self.type, self.path] def smart_ctl(*args, check=True): """Wrapper around invoking the smartctl binary. Returns: (str) Data piped to stdout by the smartctl subprocess. """ return subprocess.run( ['smartctl', *args], stdout=subprocess.PIPE, check=check ).stdout.decode('utf-8') def smart_ctl_version(): return smart_ctl('-V').split('\n')[0].split()[1] def find_devices(by_id): """Find SMART devices. Yields: (Device) Single device found by smartctl. """ parser = argparse.ArgumentParser() parser.add_argument('-d', '--device', dest='type') args = ['--scan-open'] if by_id: args.extend(['-d', 'by-id']) devices = smart_ctl(*args) for device in devices.split('\n'): device = device.strip() if not device: continue tokens = shlex.split(device, comments=True) if not tokens: continue yield Device(tokens[0], parser.parse_args(tokens[1:])) def device_is_active(device): """Returns whenever the given device is currently active or not. Args: device: (Device) Device in question. Returns: (bool) True if the device is active and False otherwise. """ try: smart_ctl('--nocheck', 'standby', *device.smartctl_select()) except subprocess.CalledProcessError: return False return True def device_info(device): """Query device for basic model information. Args: device: (Device) Device in question. Returns: (generator): Generator yielding: key (str): Key describing the value. value (str): Actual value. """ info_lines = smart_ctl( '--info', *device.smartctl_select() ).strip().split('\n')[3:] matches = (device_info_re.match(line) for line in info_lines) return (m.groups() for m in matches if m is not None) def device_smart_capabilities(device): """Returns SMART capabilities of the given device. Args: device: (Device) Device in question. Returns: (tuple): tuple containing: (bool): True whenever SMART is available, False otherwise. (bool): True whenever SMART is enabled, False otherwise. """ groups = device_info(device) state = { g[1].split(' ', 1)[0] for g in groups if g[0] == 'SMART support'} smart_available = 'Available' in state smart_enabled = 'Enabled' in state return smart_available, smart_enabled def collect_device_info(device): """Collect basic device information. Args: device: (Device) Device in question. """ values = dict(device_info(device)) metrics["device_info"].labels( device.base_labels["device"], device.base_labels["disk"], values.get("Vendor", ""), values.get("Product", ""), values.get("Revision", ""), values.get("Logical Unit id", ""), values.get("Model Family", ""), values.get("Device Model", ""), values.get("Serial Number", ""), values.get("Firmware Version", ""), ).set(1) def collect_device_health_self_assessment(device): """Collect metric about the device health self assessment. Args: device: (Device) Device in question. """ out = smart_ctl('--health', *device.smartctl_select(), check=False) self_assessment_passed = bool(self_test_re.search(out)) metrics["device_smart_healthy"].labels( device.base_labels["device"], device.base_labels["disk"] ).set(self_assessment_passed) def collect_ata_metrics(device): # Fetch SMART attributes for the given device. attributes = smart_ctl( '--attributes', *device.smartctl_select() ) # replace multiple occurrences of whitespace with a single whitespace # so that the CSV Parser recognizes individual columns properly. attributes = re.sub(r'[\t\x20]+', ' ', attributes) # Turn smartctl output into a list of lines and skip to the table of # SMART attributes. attribute_lines = attributes.strip().split('\n')[7:] # Some attributes have multiple IDs but have the same name. Don't # yield attributes that already have been reported before. seen = set() reader = csv.DictReader( (line.strip() for line in attribute_lines), fieldnames=SmartAttribute._fields[:-1], restkey=SmartAttribute._fields[-1], delimiter=' ') for entry in reader: # We're only interested in the SMART attributes that are # whitelisted here. entry['name'] = entry['name'].lower() if entry['name'] not in smart_attributes_whitelist: continue # Ensure that only the numeric parts are fetched from the raw_value. # Attributes such as 194 Temperature_Celsius reported by my SSD # are in the format of "36 (Min/Max 24/40)" which can't be expressed # properly as a prometheus metric. m = re.match(r'^(\d+)', ' '.join(entry['raw_value'])) if not m: continue entry['raw_value'] = m.group(1) # Some device models report "---" in the threshold value where most # devices would report "000". We do the substitution here because # downstream code expects values to be convertable to integer. if entry['threshold'] == '---': entry['threshold'] = '0' if entry['name'] in smart_attributes_whitelist and entry['name'] not in seen: for col in 'value', 'worst', 'threshold', 'raw_value': metrics["attr_" + col].labels( device.base_labels["device"], device.base_labels["disk"], entry["name"], ).set(entry[col]) seen.add(entry['name']) def collect_ata_error_count(device): """Inspect the device error log and report the amount of entries. Args: device: (Device) Device in question. """ error_log = smart_ctl( '-l', 'xerror,1', *device.smartctl_select(), check=False) m = ata_error_count_re.search(error_log) error_count = m.group(1) if m is not None else 0 metrics["device_errors"].labels( device.base_labels["device"], device.base_labels["disk"] ).set(error_count) def collect_disks_smart_metrics(wakeup_disks, by_id): for device in find_devices(by_id): is_active = device_is_active(device) metrics["device_active"].labels( device.base_labels["device"], device.base_labels["disk"], ).set(is_active) # Skip further metrics collection to prevent the disk from spinning up. if not is_active and not wakeup_disks: continue collect_device_info(device) smart_available, smart_enabled = device_smart_capabilities(device) metrics["device_smart_available"].labels( device.base_labels["device"], device.base_labels["disk"] ).set(smart_available) metrics["device_smart_enabled"].labels( device.base_labels["device"], device.base_labels["disk"] ).set(smart_enabled) # Skip further metrics collection here if SMART is disabled on the device. Further smartctl # invocations would fail anyway. if not smart_available: continue collect_device_health_self_assessment(device) if device.type.startswith('sat'): collect_ata_metrics(device) collect_ata_error_count(device) def main(): parser = argparse.ArgumentParser() parser.add_argument('-s', '--wakeup-disks', dest='wakeup_disks', action='store_true', help="Wake up disks to collect live stats") parser.add_argument('--by-id', dest='by_id', action='store_true', help="Use /dev/disk/by-id/X instead of /dev/sdX to index devices") args = parser.parse_args(sys.argv[1:]) metrics["smartctl_version"].labels(smart_ctl_version()).set(1) collect_disks_smart_metrics(args.wakeup_disks, args.by_id) print(generate_latest(registry).decode(), end="") if __name__ == '__main__': main() prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/smartmon.sh000077500000000000000000000174261471704556300255170ustar00rootroot00000000000000#!/usr/bin/env bash # # Script informed by the collectd monitoring script for smartmontools (using smartctl) # by Samuel B. (c) 2012 # source at: http://devel.dob.sk/collectd-scripts/ # TODO: This probably needs to be a little more complex. The raw numbers can have more # data in them than you'd think. # http://arstechnica.com/civis/viewtopic.php?p=22062211 # Formatting done via shfmt -i 2 # https://github.com/mvdan/sh # Ensure predictable numeric / date formats, etc. export LC_ALL=C parse_smartctl_attributes_awk="$( cat <<'SMARTCTLAWK' $1 ~ /^ *[0-9]+$/ && $2 ~ /^[a-zA-Z0-9_-]+$/ { gsub(/-/, "_"); printf "%s_value{%s,smart_id=\"%s\"} %d\n", $2, labels, $1, $4 printf "%s_worst{%s,smart_id=\"%s\"} %d\n", $2, labels, $1, $5 printf "%s_threshold{%s,smart_id=\"%s\"} %d\n", $2, labels, $1, $6 printf "%s_raw_value{%s,smart_id=\"%s\"} %e\n", $2, labels, $1, $10 } SMARTCTLAWK )" smartmon_attrs="$( cat <<'SMARTMONATTRS' airflow_temperature_cel command_timeout current_pending_sector end_to_end_error erase_fail_count g_sense_error_rate hardware_ecc_recovered host_reads_32mib host_reads_mib host_writes_32mib host_writes_mib load_cycle_count media_wearout_indicator nand_writes_1gib offline_uncorrectable percent_lifetime_remain power_cycle_count power_on_hours program_fail_cnt_total program_fail_count raw_read_error_rate reallocated_event_count reallocated_sector_ct reported_uncorrect runtime_bad_block sata_downshift_count seek_error_rate spin_retry_count spin_up_time start_stop_count temperature_case temperature_celsius temperature_internal total_lbas_read total_lbas_written udma_crc_error_count unsafe_shutdown_count unused_rsvd_blk_cnt_tot wear_leveling_count workld_host_reads_perc workld_media_wear_indic workload_minutes SMARTMONATTRS )" smartmon_attrs="$(echo "${smartmon_attrs}" | xargs | tr ' ' '|')" parse_smartctl_attributes() { local disk="$1" local disk_type="$2" local labels="disk=\"${disk}\",type=\"${disk_type}\"" sed 's/^ \+//g' | awk -v labels="${labels}" "${parse_smartctl_attributes_awk}" 2>/dev/null | tr '[:upper:]' '[:lower:]' | grep -E "(${smartmon_attrs})" } parse_smartctl_scsi_attributes() { local disk="$1" local disk_type="$2" local labels="disk=\"${disk}\",type=\"${disk_type}\"" while read -r line; do attr_type="$(echo "${line}" | tr '=' ':' | cut -f1 -d: | sed 's/^ \+//g' | tr ' ' '_')" attr_value="$(echo "${line}" | tr '=' ':' | cut -f2 -d: | sed 's/^ \+//g')" case "${attr_type}" in number_of_hours_powered_up_) power_on="$(echo "${attr_value}" | awk '{ printf "%e\n", $1 }')" ;; Current_Drive_Temperature) temp_cel="$(echo "${attr_value}" | cut -f1 -d' ' | awk '{ printf "%e\n", $1 }')" ;; Blocks_sent_to_initiator_) lbas_read="$(echo "${attr_value}" | awk '{ printf "%e\n", $1 }')" ;; Blocks_received_from_initiator_) lbas_written="$(echo "${attr_value}" | awk '{ printf "%e\n", $1 }')" ;; Accumulated_start-stop_cycles) power_cycle="$(echo "${attr_value}" | awk '{ printf "%e\n", $1 }')" ;; Elements_in_grown_defect_list) grown_defects="$(echo "${attr_value}" | awk '{ printf "%e\n", $1 }')" ;; esac done [ -n "$power_on" ] && echo "power_on_hours_raw_value{${labels},smart_id=\"9\"} ${power_on}" [ -n "$temp_cel" ] && echo "temperature_celsius_raw_value{${labels},smart_id=\"194\"} ${temp_cel}" [ -n "$lbas_read" ] && echo "total_lbas_read_raw_value{${labels},smart_id=\"242\"} ${lbas_read}" [ -n "$lbas_written" ] && echo "total_lbas_written_raw_value{${labels},smart_id=\"241\"} ${lbas_written}" [ -n "$power_cycle" ] && echo "power_cycle_count_raw_value{${labels},smart_id=\"12\"} ${power_cycle}" [ -n "$grown_defects" ] && echo "grown_defects_count_raw_value{${labels},smart_id=\"-1\"} ${grown_defects}" } parse_smartctl_info() { local -i smart_available=0 smart_enabled=0 smart_healthy= local disk="$1" disk_type="$2" local model_family='' device_model='' serial_number='' fw_version='' vendor='' product='' revision='' lun_id='' while read -r line; do info_type="$(echo "${line}" | cut -f1 -d: | tr ' ' '_')" info_value="$(echo "${line}" | cut -f2- -d: | sed 's/^ \+//g' | sed 's/"/\\"/')" case "${info_type}" in Model_Family) model_family="${info_value}" ;; Device_Model) device_model="${info_value}" ;; Serial_Number|Serial_number) serial_number="${info_value}" ;; Firmware_Version) fw_version="${info_value}" ;; Vendor) vendor="${info_value}" ;; Product) product="${info_value}" ;; Revision) revision="${info_value}" ;; Logical_Unit_id) lun_id="${info_value}" ;; esac if [[ "${info_type}" == 'SMART_support_is' ]]; then case "${info_value:0:7}" in Enabled) smart_available=1; smart_enabled=1 ;; Availab) smart_available=1; smart_enabled=0 ;; Unavail) smart_available=0; smart_enabled=0 ;; esac fi if [[ "${info_type}" == 'SMART_overall-health_self-assessment_test_result' ]]; then case "${info_value:0:6}" in PASSED) smart_healthy=1 ;; *) smart_healthy=0 ;; esac elif [[ "${info_type}" == 'SMART_Health_Status' ]]; then case "${info_value:0:2}" in OK) smart_healthy=1 ;; *) smart_healthy=0 ;; esac fi done echo "device_info{disk=\"${disk}\",type=\"${disk_type}\",vendor=\"${vendor}\",product=\"${product}\",revision=\"${revision}\",lun_id=\"${lun_id}\",model_family=\"${model_family}\",device_model=\"${device_model}\",serial_number=\"${serial_number}\",firmware_version=\"${fw_version}\"} 1" echo "device_smart_available{disk=\"${disk}\",type=\"${disk_type}\"} ${smart_available}" echo "device_smart_enabled{disk=\"${disk}\",type=\"${disk_type}\"} ${smart_enabled}" [[ "${smart_healthy}" != "" ]] && echo "device_smart_healthy{disk=\"${disk}\",type=\"${disk_type}\"} ${smart_healthy}" } output_format_awk="$( cat <<'OUTPUTAWK' BEGIN { v = "" } v != $1 { print "# HELP smartmon_" $1 " SMART metric " $1; print "# TYPE smartmon_" $1 " gauge"; v = $1 } {print "smartmon_" $0} OUTPUTAWK )" format_output() { sort | awk -F'{' "${output_format_awk}" } smartctl_version="$(/usr/sbin/smartctl -V | awk 'NR==1 && $1 == "smartctl" {print $2}')" echo "smartctl_version{version=\"${smartctl_version}\"} 1" | format_output # Exit if "smartctl" version is lower 6 if [[ ${smartctl_version%.*} -lt 6 ]]; then exit 0 fi device_list="$(/usr/sbin/smartctl --scan-open | awk '/^\/dev/{print $1 "|" $3}')" for device in ${device_list}; do disk="$(echo "${device}" | cut -f1 -d'|')" type="$(echo "${device}" | cut -f2 -d'|')" active=1 echo "smartctl_run{disk=\"${disk}\",type=\"${type}\"}" "$(TZ=UTC date '+%s')" # Check if the device is in a low-power mode /usr/sbin/smartctl -n standby -d "${type}" "${disk}" > /dev/null || active=0 echo "device_active{disk=\"${disk}\",type=\"${type}\"}" "${active}" # Skip further metrics to prevent the disk from spinning up test ${active} -eq 0 && continue # Get the SMART information and health /usr/sbin/smartctl -i -H -d "${type}" "${disk}" | parse_smartctl_info "${disk}" "${type}" # Get the SMART attributes case ${type} in sat) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_attributes "${disk}" "${type}" ;; sat+megaraid*) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_attributes "${disk}" "${type}" ;; scsi) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_scsi_attributes "${disk}" "${type}" ;; megaraid*) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_scsi_attributes "${disk}" "${type}" ;; nvme*) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_scsi_attributes "${disk}" "${type}" ;; usbprolific) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_attributes "${disk}" "${type}" ;; *) (>&2 echo "disk type is not sat, scsi, nvme or megaraid but ${type}") exit ;; esac done | format_output prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/storcli.py000077500000000000000000000364421471704556300253530ustar00rootroot00000000000000#!/usr/bin/env python3 """ Script to parse StorCLI's JSON output and expose MegaRAID health as Prometheus metrics. Tested against StorCLI 'Ver 1.14.12 Nov 25, 2014'. and '007.1108.0000.0000 July 17, 2019' StorCLI reference manual: https://docs.broadcom.com/docs/12352476 Advanced Software Options (ASO) not exposed as metrics currently. JSON key abbreviations used by StorCLI are documented in the standard command output, i.e. when the trailing 'J' is omitted from the command. Formatting done with Black: $ black -l 100 storcli.py """ import argparse import json import os import shlex import subprocess from datetime import datetime from prometheus_client import CollectorRegistry, Gauge, generate_latest __doc__ = "Parse StorCLI's JSON output and expose MegaRAID health as Prometheus metrics." __version__ = "0.1.0" storcli_path = "" namespace = "megaraid" registry = CollectorRegistry() metrics = { # fmt: off "ctrl_info": Gauge( "controller_info", "MegaRAID controller info", ["controller", "model", "serial", "fwversion"], namespace=namespace, registry=registry, ), "ctrl_temperature": Gauge( "temperature", "MegaRAID controller temperature", ["controller"], namespace=namespace, registry=registry, ), "ctrl_healthy": Gauge( "healthy", "MegaRAID controller healthy", ["controller"], namespace=namespace, registry=registry, ), "ctrl_degraded": Gauge( "degraded", "MegaRAID controller degraded", ["controller"], namespace=namespace, registry=registry, ), "ctrl_failed": Gauge( "failed", "MegaRAID controller failed", ["controller"], namespace=namespace, registry=registry, ), "ctrl_time_difference": Gauge( "time_difference", "MegaRAID time difference", ["controller"], namespace=namespace, registry=registry, ), "bbu_healthy": Gauge( "battery_backup_healthy", "MegaRAID battery backup healthy", ["controller"], namespace=namespace, registry=registry, ), "bbu_temperature": Gauge( "bbu_temperature", "MegaRAID battery backup temperature", ["controller", "bbuidx"], namespace=namespace, registry=registry, ), "cv_temperature": Gauge( "cv_temperature", "MegaRAID CacheVault temperature", ["controller", "cvidx"], namespace=namespace, registry=registry, ), "ctrl_sched_patrol_read": Gauge( "scheduled_patrol_read", "MegaRAID scheduled patrol read", ["controller"], namespace=namespace, registry=registry, ), "ctrl_ports": Gauge( "ports", "MegaRAID ports", ["controller"], namespace=namespace, registry=registry, ), "ctrl_physical_drives": Gauge( "physical_drives", "MegaRAID physical drives", ["controller"], namespace=namespace, registry=registry, ), "ctrl_drive_groups": Gauge( "drive_groups", "MegaRAID drive groups", ["controller"], namespace=namespace, registry=registry, ), "ctrl_virtual_drives": Gauge( "virtual_drives", "MegaRAID virtual drives", ["controller"], namespace=namespace, registry=registry, ), "vd_info": Gauge( "vd_info", "MegaRAID virtual drive info", ["controller", "DG", "VG", "name", "cache", "type", "state"], namespace=namespace, registry=registry, ), "pd_shield_counter": Gauge( "pd_shield_counter", "MegaRAID physical drive shield counter", ["controller", "enclosure", "slot"], namespace=namespace, registry=registry, ), "pd_media_errors": Gauge( "pd_media_errors", "MegaRAID physical drive media errors", ["controller", "enclosure", "slot"], namespace=namespace, registry=registry, ), "pd_other_errors": Gauge( "pd_other_errors", "MegaRAID physical drive other errors", ["controller", "enclosure", "slot"], namespace=namespace, registry=registry, ), "pd_predictive_errors": Gauge( "pd_predictive_errors", "MegaRAID physical drive predictive errors", ["controller", "enclosure", "slot"], namespace=namespace, registry=registry, ), "pd_smart_alerted": Gauge( "pd_smart_alerted", "MegaRAID physical drive SMART alerted", ["controller", "enclosure", "slot"], namespace=namespace, registry=registry, ), "pd_link_speed": Gauge( "pd_link_speed_gbps", "MegaRAID physical drive link speed in Gbps", ["controller", "enclosure", "slot"], namespace=namespace, registry=registry, ), "pd_device_speed": Gauge( "pd_device_speed_gbps", "MegaRAID physical drive device speed in Gbps", ["controller", "enclosure", "slot"], namespace=namespace, registry=registry, ), "pd_commissioned_spare": Gauge( "pd_commissioned_spare", "MegaRAID physical drive commissioned spare", ["controller", "enclosure", "slot"], namespace=namespace, registry=registry, ), "pd_emergency_spare": Gauge( "pd_emergency_spare", "MegaRAID physical drive emergency spare", ["controller", "enclosure", "slot"], namespace=namespace, registry=registry, ), "pd_info": Gauge( "pd_info", "MegaRAID physical drive info", [ "controller", "enclosure", "slot", "disk_id", "interface", "media", "model", "DG", "state", "firmware", "serial", ], namespace=namespace, registry=registry, ), "pd_temp": Gauge( "pd_temp_celsius", "MegaRAID physical drive temperature in degrees Celsius", ["controller", "enclosure", "slot"], namespace=namespace, registry=registry, ), # fmt: on } def main(args): """main""" global storcli_path storcli_path = args.storcli_path data = get_storcli_json("/cALL show all J") try: # All the information is collected underneath the Controllers key data = data["Controllers"] for controller in data: response = controller["Response Data"] handle_common_controller(response) if response["Version"]["Driver Name"] == "megaraid_sas": handle_megaraid_controller(response) elif response["Version"]["Driver Name"] == "mpt3sas": handle_sas_controller(response) except KeyError: pass print(generate_latest(registry).decode(), end="") def handle_common_controller(response): controller_index = response["Basics"]["Controller"] metrics["ctrl_info"].labels( controller_index, response["Basics"]["Model"], response["Basics"]["Serial Number"], response["Version"]["Firmware Version"], ).set(1) # Older boards don't have this sensor at all ("Temperature Sensor for ROC" : "Absent") for key in ["ROC temperature(Degree Celcius)", "ROC temperature(Degree Celsius)"]: if key in response["HwCfg"]: metrics["ctrl_temperature"].labels(controller_index).set(response["HwCfg"][key]) break def handle_sas_controller(response): controller_index = response["Basics"]["Controller"] metrics["ctrl_healthy"].labels(controller_index).set( response["Status"]["Controller Status"] == "OK" ) metrics["ctrl_ports"].labels(controller_index).set(response["HwCfg"]["Backend Port Count"]) try: # The number of physical disks is half of the number of items in this dict. Every disk is # listed twice - once for basic info, again for detailed info. metrics["ctrl_physical_drives"].labels(controller_index).set( len(response["Physical Device Information"].keys()) / 2 ) except AttributeError: pass for key, basic_disk_info in response["Physical Device Information"].items(): if "Detailed Information" in key: continue create_metrics_of_physical_drive( basic_disk_info[0], response["Physical Device Information"], controller_index ) def handle_megaraid_controller(response): controller_index = response["Basics"]["Controller"] if response["Status"]["BBU Status"] != "NA": # BBU Status Optimal value is 0 for normal, 8 for charging. metrics["bbu_healthy"].labels(controller_index).set( response["Status"]["BBU Status"] in [0, 8, 4096] ) metrics["ctrl_degraded"].labels(controller_index).set( response["Status"]["Controller Status"] == "Degraded" ) metrics["ctrl_failed"].labels(controller_index).set( response["Status"]["Controller Status"] == "Failed" ) metrics["ctrl_healthy"].labels(controller_index).set( response["Status"]["Controller Status"] == "Optimal" ) metrics["ctrl_ports"].labels(controller_index).set(response["HwCfg"]["Backend Port Count"]) metrics["ctrl_sched_patrol_read"].labels(controller_index).set( "hrs" in response["Scheduled Tasks"]["Patrol Read Reoccurrence"] ) for cvidx, cvinfo in enumerate(response.get("Cachevault_Info", [])): if "Temp" in cvinfo: metrics["cv_temperature"].labels(controller_index, cvidx).set( cvinfo["Temp"].replace("C", "") ) for bbuidx, bbuinfo in enumerate(response.get("BBU_Info", [])): if "Temp" in bbuinfo: metrics["bbu_temperature"].labels(controller_index, bbuidx).set( bbuinfo["Temp"].replace("C", "") ) system_time = datetime.strptime( response["Basics"]["Current System Date/time"], "%m/%d/%Y, %H:%M:%S" ) controller_time = datetime.strptime( response["Basics"]["Current Controller Date/Time"], "%m/%d/%Y, %H:%M:%S" ) if system_time and controller_time: metrics["ctrl_time_difference"].labels(controller_index).set( abs(system_time - controller_time).seconds ) # Make sure it doesn't crash if it's a JBOD setup if "Drive Groups" in response: metrics["ctrl_drive_groups"].labels(controller_index).set(response["Drive Groups"]) metrics["ctrl_virtual_drives"].labels(controller_index).set(response["Virtual Drives"]) for virtual_drive in response["VD LIST"]: vd_position = virtual_drive.get("DG/VD") if vd_position: drive_group, volume_group = vd_position.split("/")[:2] else: drive_group, volume_group = -1, -1 metrics["vd_info"].labels( controller_index, drive_group, volume_group, virtual_drive["Name"], virtual_drive["Cache"], virtual_drive["TYPE"], virtual_drive["State"], ).set(1) metrics["ctrl_physical_drives"].labels(controller_index).set(response["Physical Drives"]) if response["Physical Drives"] > 0: data = get_storcli_json("/cALL/eALL/sALL show all J") drive_info = data["Controllers"][controller_index]["Response Data"] for physical_drive in response["PD LIST"]: create_metrics_of_physical_drive(physical_drive, drive_info, controller_index) def create_metrics_of_physical_drive(physical_drive, detailed_info_array, controller_index): enclosure, slot = physical_drive.get("EID:Slt").split(":")[:2] if enclosure == " ": drive_identifier = "Drive /c{0}/s{1}".format(controller_index, slot) enclosure = "" else: drive_identifier = "Drive /c{0}/e{1}/s{2}".format(controller_index, enclosure, slot) try: info = detailed_info_array[drive_identifier + " - Detailed Information"] state = info[drive_identifier + " State"] attributes = info[drive_identifier + " Device attributes"] settings = info[drive_identifier + " Policies/Settings"] if state["Shield Counter"] != "N/A": metrics["pd_shield_counter"].labels(controller_index, enclosure, slot).set( state["Shield Counter"] ) if state["Media Error Count"] != "N/A": metrics["pd_media_errors"].labels(controller_index, enclosure, slot).set( state["Media Error Count"] ) if state["Other Error Count"] != "N/A": metrics["pd_other_errors"].labels(controller_index, enclosure, slot).set( state["Other Error Count"] ) if state["Predictive Failure Count"] != "N/A": metrics["pd_predictive_errors"].labels(controller_index, enclosure, slot).set( state["Predictive Failure Count"] ) metrics["pd_smart_alerted"].labels(controller_index, enclosure, slot).set( state["S.M.A.R.T alert flagged by drive"] == "Yes" ) if attributes["Link Speed"] != "Unknown": metrics["pd_link_speed"].labels(controller_index, enclosure, slot).set( attributes["Link Speed"].split(".")[0] ) if attributes["Device Speed"] != "Unknown": metrics["pd_device_speed"].labels(controller_index, enclosure, slot).set( attributes["Device Speed"].split(".")[0] ) metrics["pd_commissioned_spare"].labels(controller_index, enclosure, slot).set( settings["Commissioned Spare"] == "Yes" ) metrics["pd_emergency_spare"].labels(controller_index, enclosure, slot).set( settings["Emergency Spare"] == "Yes" ) # Model, firmware version and serial number may be space-padded, so strip() them. metrics["pd_info"].labels( controller_index, enclosure, slot, physical_drive["DID"], physical_drive["Intf"], physical_drive["Med"], physical_drive["Model"].strip(), physical_drive["DG"], physical_drive["State"], attributes["Firmware Revision"].strip(), attributes["SN"].strip(), ).set(1) if "Drive Temperature" in state and state["Drive Temperature"] != "N/A": metrics["pd_temp"].labels(controller_index, enclosure, slot).set( state["Drive Temperature"].split("C")[0].strip() ) except KeyError: pass def get_storcli_json(storcli_args): """Get storcli output in JSON format.""" # Check if storcli is installed and executable if not (os.path.isfile(storcli_path) and os.access(storcli_path, os.X_OK)): raise SystemExit(1) storcli_cmd = [storcli_path] storcli_cmd.extend(shlex.split(storcli_args)) storcli_cmd.append("nolog") proc = subprocess.Popen( storcli_cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) stdout, _ = proc.communicate() data = json.loads(stdout.decode()) if data["Controllers"][0]["Command Status"]["Status"] != "Success": raise SystemExit(1) return data if __name__ == "__main__": parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument( "--storcli_path", default="/opt/MegaRAID/storcli/storcli64", help="path to StorCLI binary", ) parser.add_argument("--version", action="version", version="%(prog)s {0}".format(__version__)) args = parser.parse_args() main(args) prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/tw_cli.py000077500000000000000000000317001471704556300251450ustar00rootroot00000000000000#!/usr/bin/env python3 # # Prometheus node_exporter textfile collector for 3ware RAID controllers # # Half of it based on "Nagios Plugin for 3ware RAID" from "Hari Sekhon", # Ref: http://github.com/harisekhon/nagios-plugins # ... with additions for full info (-I) gathering # # (c) 2019, Nuno Tavares # # You can find the latest version at: # https://github.com/ntavares/node-exporter-textfile-collector-scripts # """Nagios plugin to test the state of all 3ware RAID arrays and / or drives on all 3ware controllers on the local machine. Requires the tw_cli program written by 3ware, which should be called tw_cli_64 if running on a 64-bit system. May be remotely executed via any of the standard remote nagios execution mechanisms""" import copy import os import re import sys from argparse import ArgumentParser from subprocess import Popen, PIPE, STDOUT __version__ = '0.1.0' BIN = None METRICS = {} METRIC_PREFIX = 'tw_cli' def exit_error(msg): print('{}_cli_error{{message="{}"}}\t1'.format(METRIC_PREFIX, msg)) sys.exit(1) def exit_clean(): global METRICS for mk, mv in METRICS.items(): print('{}_{}\t{}'.format(METRIC_PREFIX, mk, mv)) sys.exit(0) def add_metric(metric, labels, value): global METRICS labelstrs = [] for lk, lv in labels.items(): labelstrs += ['{}="{}"'.format(lk, lv)] labelstr = ','.join(labelstrs) METRICS[metric + '{' + labelstr + '}'] = str(value) def _set_twcli_binary(): """Set the path to the twcli binary""" global BIN BIN = '/usr/sbin/tw_cli' def run(cmd, stripOutput=True): """Runs a system command and returns stripped output""" if not cmd: exit_error("Internal python error - no cmd supplied for 3ware utility") try: process = Popen(BIN, stdin=PIPE, stdout=PIPE, stderr=STDOUT) except OSError as error: error = str(error) if error == "No such file or directory": exit_error("Cannot find 3ware utility '{}'".format(BIN)) else: exit_error("Error trying to run 3ware utility - {}".format(error)) if process.poll(): exit_error("3ware utility process ended prematurely") try: stdout, stderr = process.communicate(cmd) except OSError as error: exit_error("Unable to communicate with 3ware utility - {}".format(error)) if not stdout: exit_error("No output from 3ware utility") output = str(stdout).split('\n') # Strip command prompt, since we're running an interactive CLI shell output[0] = re.sub(r'//.*?> ', '', output[0]) if output[1] == "No controller found.": exit_error("No 3ware controllers were found on this machine") if process.returncode != 0: stderr = str(stdout).replace('\n', ' ') exit_error("3ware utility returned an exit code of {} - {}".format(process.returncode, stderr)) if stripOutput: return output[3:-2] return output def test_all(verbosity, warn_true=False): """Calls the RAID and drive testing functions""" test_arrays(verbosity, warn_true) test_drives(verbosity, warn_true) def test_arrays(verbosity, warn_true=False): """Tests all the RAID arrays on all the 3ware controllers on the local machine""" lines = run('show') # controllers = [line.split()[0] for line in lines] controllers = [line.split()[0] for line in lines if line.startswith('c')] for controller in controllers: unit_lines = run('/{} show unitstatus'.format(controller)) if verbosity >= 3: for unit_line in unit_lines: print(unit_line) print() for unit_line in unit_lines: unit_line = unit_line.split() state = unit_line[2] unit = int(unit_line[0][1:]) raid = unit_line[1] add_metric('array_info', {'controller': controller[1:], 'unit': unit, 'state': state, 'raid': raid}, 1) if state == 'OK': add_metric('array_status', {'controller': controller[1:], 'unit': unit, 'state': state}, 1) continue elif state in ('REBUILDING', 'VERIFY-PAUSED', 'VERIFYING', 'INITIALIZING'): if state in ('VERIFY-PAUSED', 'VERIFYING', 'INITIALIZING'): percent_complete = unit_line[4] else: percent_complete = unit_line[3] if warn_true: add_metric('array_status', {'controller': controller[1:], 'unit': unit, 'state': state, 'pct': percent_complete}, 0) else: add_metric('array_status', {'controller': controller[1:], 'unit': unit, 'state': state, 'pct': percent_complete}, 1) else: add_metric('array_status', {'controller': controller[1:], 'unit': unit, 'state': state}, 0) def test_drives(verbosity, warn_true=False): """Tests all the drives on the all the 3ware RAID controllers on the local machine""" lines = run('show') controllers = [] for line in lines: parts = line.split() if parts: controllers.append(parts[0]) for controller in controllers: drive_lines = run('/{} show drivestatus'.format(controller)) if verbosity >= 3: for drive_line in drive_lines: print(drive_line) print() for drive_line in drive_lines: drive_line = drive_line.split() state = drive_line[1] drive = drive_line[0] if drive[0] == 'd': drive = drive[1:] array = drive_line[2] if array[0] == 'u': array = array[1:] if state in ('OK', 'NOT-PRESENT'): add_metric('drive_status', {'controller': controller[1:], 'drive': drive, 'array': array, 'state': state}, 1) continue if not warn_true and state in ('VERIFYING', 'REBUILDING', 'INITIALIZING'): add_metric('drive_status', {'controller': controller[1:], 'drive': drive, 'array': array, 'state': state}, 1) continue else: add_metric('drive_status', {'controller': controller[1:], 'drive': drive, 'array': array, 'state': state}, 0) def _parse_temperature(val): result = re.split(r'(\d+)(.*)$', val) return result[1] def _parse_yes_ok_on(val): if val in ('OK', 'Yes', 'On'): return 1 return 0 def collect_details(cmdprefix, detailsMap, metric, injectedLabels, verbosity): """Generic function to parse key = value lists, based on a detailsMap which selects the fields to parse. injectedLabels is just baseline labels to be included. Note that the map may list both labels to append to a catchall 'metric', or individual metrics, whose name overrides 'metric' and will contain injectedLabels.""" lines = run('{} show all'.format(cmdprefix), False) labels = copy.copy(injectedLabels) for line in lines: if re.match('^' + cmdprefix + ' (.+?)= (.+?)$', line): if verbosity >= 3: print(line) result = re.split(r'\S+ (.+?)= (.+?)$', line) # print("RESULT:", str(result)) k = result[1].strip() v = result[2].strip() if k in detailsMap: if detailsMap[k]['parser']: v = detailsMap[k]['parser'](v) # If this field is meant for a separate metric, do it if 'metric' in detailsMap[k]: add_metric(detailsMap[k]['metric'], injectedLabels, v) else: labels[detailsMap[k]['label']] = v add_metric(metric, labels, 1) def collect_controller(verbosity): CTRL_DETAILS = { 'Model': {'label': 'model', 'parser': None}, 'Firmware Version': {'label': 'firmware', 'parser': None}, 'Bios Version': {'label': 'bios', 'parser': None}, 'Serial Number': {'label': 'serial', 'parser': None}, 'PCB Version': {'label': 'pcb', 'parser': None}, 'PCHIP Version': {'label': 'pchip', 'parser': None}, 'ACHIP Version': {'label': 'achip', 'parser': None}, } lines = run('show') controllers = [line.split()[0] for line in lines if line.startswith('c')] for controller in controllers: collect_details('/' + controller, CTRL_DETAILS, 'controller_info', {'controller': controller[1:]}, verbosity) collect_bbu(controller, verbosity) collect_drives(controller, verbosity) def collect_drives(controller, verbosity): DRIVE_DETAILS = { # 'Status': {'metric': 'drive_status', 'parser': _parse_yes_ok_on}, 'Reallocated Sectors': {'metric': 'drive_reallocated_sectors', 'parser': None}, 'Temperature': {'metric': 'drive_temperature', 'parser': _parse_temperature}, 'Model': {'label': 'model', 'parser': None}, 'Firmware Version': {'label': 'firmware', 'parser': None}, 'Serial': {'label': 'serial', 'parser': None}, 'Belongs to Unit': {'label': 'unit', 'parser': None}, 'Link Speed': {'label': 'linkspeed', 'parser': None}, } drive_lines = run('/' + controller + ' show drivestatus') for drive_line in drive_lines: drive_line = drive_line.split() drive = drive_line[0] collect_details('/' + controller + '/' + drive, DRIVE_DETAILS, 'drive_info', {'controller': controller[1:], 'drive': drive}, verbosity) def collect_bbu(controller, verbosity): BBU_DETAILS = { 'Firmware Version': {'label': 'firmware', 'parser': None}, 'Serial Number': {'label': 'serial', 'parser': None}, 'Bootloader Version': {'label': 'bootloader', 'parser': None}, 'PCB Revision': {'label': 'pcb', 'parser': None}, 'Battery Installation Date': {'label': 'since', 'parser': None}, 'Online State': {'metric': 'bbu_online', 'parser': _parse_yes_ok_on}, 'BBU Ready': {'metric': 'bbu_ready', 'parser': _parse_yes_ok_on}, 'BBU Status': {'metric': 'bbu_status', 'parser': _parse_yes_ok_on}, 'Battery Voltage status': {'metric': 'bbu_voltage_status', 'parser': _parse_yes_ok_on}, 'Battery Temperature Status': {'metric': 'bbu_temperature_status', 'parser': _parse_yes_ok_on}, 'Battery Temperature Value': {'metric': 'bbu_temperature', 'parser': _parse_temperature}, } collect_details('/' + controller + '/bbu', BBU_DETAILS, 'bbu_info', {'controller': controller[1:]}, verbosity) def main(): """Parses command line options and calls the function to test the arrays/drives""" parser = ArgumentParser() group = parser.add_mutually_exclusive_group() group.add_argument('-a', '--arrays-only', action='store_true', help="Only test the arrays (default: %(default)s)") group.add_argument('-d', '--drives-only', action='store_true', help="Only test the drives (default: %(default)s)") parser.add_argument('-I', '--info', action='store_true', dest='incl_info', help="Include detailed component info (default: %(default)s)") parser.add_argument('-w', '--warn-rebuilding', action='store_true', help="Warn when an array or disk is Rebuilding, Initializing or Verifying. " "You might want to do this to keep a closer eye on things. Also, these " "conditions can affect performance so you might want to know this is going " "on (default: %(default)s)") parser.add_argument('-v', '--verbose', action='count', dest='verbosity', help="Verbose mode. By default only one result line is printed as per " "Nagios standards") parser.add_argument('-V', '--version', action='version', version=__version__) args = parser.parse_args() if args.drives_only and args.warn_rebuilding: parser.error("You cannot use the -d and -w switches together. Array warning states are " "invalid when testing only drives.") if os.geteuid() != 0: exit_error("You must be root to run this plugin") _set_twcli_binary() if args.drives_only: test_drives(args.verbosity, args.warn_rebuilding) else: test_all(args.verbosity, args.warn_rebuilding) if args.incl_info: collect_controller(args.verbosity) exit_clean() if __name__ == '__main__': try: main() except KeyboardInterrupt: print("Caught Control-C...") sys.exit(1) prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/yum.sh000077500000000000000000000022241471704556300244570ustar00rootroot00000000000000#!/usr/bin/env bash # # Description: Expose metrics from yum updates. # # Author: Slawomir Gonet # # Based on apt.sh by Ben Kochie set -u -o pipefail # shellcheck disable=SC2016 filter_awk_script=' BEGIN { mute=1 } /Obsoleting Packages/ { mute=0 } mute && /^[[:print:]]+\.[[:print:]]+/ { print $3 } ' check_upgrades() { /usr/bin/yum -q check-update | /usr/bin/xargs -n3 | awk "${filter_awk_script}" | sort | uniq -c | awk '{print "yum_upgrades_pending{origin=\""$2"\"} "$1}' } upgrades=$(check_upgrades) echo '# HELP yum_upgrades_pending Yum package pending updates by origin.' echo '# TYPE yum_upgrades_pending gauge' if [[ -n "${upgrades}" ]]; then echo "${upgrades}" else echo 'yum_upgrades_pending{origin=""} 0' fi # If yum-utils/dnf-utils is not installed then we skip rendering this metric if [[ -x /bin/needs-restarting ]]; then echo '# HELP node_reboot_required Node reboot is required for software updates.' echo '# TYPE node_reboot_required gauge' if /bin/needs-restarting -r >/dev/null 2>&1; then echo 'node_reboot_required 0' else echo 'node_reboot_required 1' fi fi prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/zfs-snapshots.py000077500000000000000000000051161471704556300265100ustar00rootroot00000000000000#!/usr/bin/env python3 import os import subprocess from functools import reduce, partial from itertools import groupby from operator import itemgetter, add from prometheus_client import CollectorRegistry, Gauge, generate_latest def row_to_metric(metric: Gauge, row): return metric.labels(pool=row[0][0], volume=row[0][1]).set(row[1]) def collect_metrics(metric: Gauge, it) -> None: list(map(partial(row_to_metric, metric), it)) def zfs_parse_line(line): cols = line.split("\t") rest, snapshot = cols[0].rsplit("@", 1) pool = rest volume = None if "/" in rest: pool, volume = rest.split("/", 1) volume = "/" + volume return pool, volume, snapshot, *map(int, cols[1:]) def zfs_list_snapshots(): cmd = [ "zfs", "list", "-p", "-H", "-t", "snapshot", "-o", "name,used,creation", ] # zfs list can be relatively slow (couple of seconds) # Use Popen to incrementally read from stdout to not waste further time popen = subprocess.Popen( cmd, stdout=subprocess.PIPE, env=dict(os.environ, LC_ALL="C") ) for stdout_line in iter(popen.stdout.readline, ""): stdout_line = stdout_line.strip() if stdout_line == b"": break yield stdout_line.decode("utf-8") return_code = popen.wait() if return_code: raise subprocess.CalledProcessError(return_code, cmd) def aggregate_rows(rows, index, operator): return map( lambda row: (row[0], reduce(operator, map(itemgetter(index), row[1]), 0)), rows ) NAMESPACE = "zfs_snapshot" LABEL_NAMES = ["pool", "volume"] def main(): registry = CollectorRegistry() latest_time_metric = Gauge( "latest_time", "Timestamp of the latest snapshot", labelnames=LABEL_NAMES, namespace=NAMESPACE, registry=registry, unit="seconds", ) space_used_metric = Gauge( "space_used", "Space used by snapshots in bytes", labelnames=LABEL_NAMES, namespace=NAMESPACE, registry=registry, unit="bytes", ) snapshots = map(zfs_parse_line, zfs_list_snapshots()) per_fs = list( map( lambda row: (row[0], list(row[1])), groupby(snapshots, lambda row: row[0:2]) ) ) space_used = aggregate_rows(per_fs, -2, add) latest_time = aggregate_rows(per_fs, -1, max) collect_metrics(latest_time_metric, latest_time) collect_metrics(space_used_metric, space_used) print(generate_latest(registry).decode(), end="") if __name__ == "__main__": main() prometheus-node-exporter-collectors-0.0~git20241119.a2b43e1/zfs_zpool.sh000077500000000000000000000066751471704556300257100ustar00rootroot00000000000000#!/usr/bin/env bash # # Script to give information about zpools and zfs datasets # Author: Brian Candler # # NOTE: zpool metrics requires zpool list -p # (zfsonlinux 0.7.5 OK, 0.6.5.6 not OK) set -eu # which dataset types to show DATASET_TYPES="filesystem,volume" # metric name prefixes ZPOOL="zfs_zpool" DATASET="zfs_dataset" # label names ZPOOL_NAME="zpool_name" DATASET_NAME="dataset_name" DATASET_TYPE="dataset_type" IFS=$'\t' ### zpool metadata ### echo "# HELP ${ZPOOL} Constant metric with metadata about the zpool" echo "# TYPE ${ZPOOL} gauge" zpool list -H -o name,health,version,readonly,ashift,autoreplace,failmode | while read -r name health version readonly ashift autoreplace failmode; do echo "${ZPOOL}{${ZPOOL_NAME}=\"$name\",health=\"$health\",version=\"$version\",readonly=\"$readonly\",ashift=\"$ashift\",autoreplace=\"$autoreplace\",failmode=\"$failmode\"} 1" done ### zpool metrics ### zpool_info="$(zpool list -Hp -o name,size,free,freeing,dedupratio,fragmentation 2>/dev/null)" && [ -n "$zpool_info" ] && while read -r col metric help; do echo "# HELP ${ZPOOL}_${metric} ${help}" echo "# TYPE ${ZPOOL}_${metric} gauge" while read -r -a line; do echo "${ZPOOL}_${metric}{${ZPOOL_NAME}=\"${line[0]}\"} ${line[$col]/%-/0}" done <<<"$zpool_info" done <<<$'1\tsize_bytes\tTotal size of the storage pool 2\tfree_bytes\tThe amount of free space available in the pool 3\tfreeing_bytes\tThe amount of space waiting to be reclaimed from destroyed filesystems or snapshots 4\tdedupratio\tThe deduplication ratio 5\tfragmentation\tThe amount of fragmentation in the pool' ### dataset metadata ### echo "# HELP ${DATASET} Constant metric with metadata about the zfs dataset" echo "# TYPE ${DATASET} gauge" zfs list -Hp -t $DATASET_TYPES -o name,type,creation,mounted,mountpoint,checksum,compression,readonly,version,dedup,volblocksize | while read -r name type creation mounted mountpoint checksum compression readonly version dedup volblocksize; do echo "${DATASET}{$DATASET_NAME=\"$name\",$DATASET_TYPE=\"$type\",creation=\"$creation\",mounted=\"$mounted\",mountpoint=\"$mountpoint\",checksum=\"$checksum\",compression=\"$compression\",readonly=\"$readonly\",version=\"$version\",dedup=\"$dedup\",volblocksize=\"$volblocksize\"} 1" done ### dataset metrics ### dataset_info="$(zfs list -Hp -t $DATASET_TYPES -o name,used,available,referenced,compressratio,reservation,refreservation,volsize)" && [ -n "$dataset_info" ] && while read -r col metric help; do echo "# HELP ${DATASET}_${metric} ${help}" echo "# TYPE ${DATASET}_${metric} gauge" while read -r -a line; do # change "-" to "0", and "1.43x" to "1.430" echo "${DATASET}_${metric}{${DATASET_NAME}=\"${line[0]}\"} ${line[$col]/%[x-]/0}" done <<<"$dataset_info" done <<<$'1\tused_bytes\tThe amount of space consumed by this dataset and all its descendents 2\tavailable_bytes\tThe amount of space available to the dataset and all its children 3\treferenced_bytes\tThe amount of data that is accessible by this dataset, which may or may not be shared with other datasets in the pool 4\tcompressratio\tFor non-snapshots, the compression ratio achieved for the used space of this dataset, expressed as a multiplier 5\treservation_bytes\tThe minimum amount of space guaranteed to a dataset and its descendants 6\trefreservation_bytes\tThe minimum amount of space guaranteed to a dataset, not including its descendents 7\tvolsize_bytes\tFor volumes, specifies the logical size of the volume'