pax_global_header 0000666 0000000 0000000 00000000064 12353073334 0014515 g ustar 00root root 0000000 0000000 52 comment=bd5d834cfa719cfadc9473353e21469f0f5398b5
syslog-nagios-bridge/ 0000775 0000000 0000000 00000000000 12353073334 0015111 5 ustar 00root root 0000000 0000000 syslog-nagios-bridge/COPYING 0000664 0000000 0000000 00000001505 12353073334 0016145 0 ustar 00root root 0000000 0000000 # syslog-nagios-bridge - transfer Syslog events to Nagios checkresults file
#
# Project page: https://github.com/dpocock/python-netsyslog
#
# Copyright (C) 2014 Daniel Pocock http://danielpocock.com
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
syslog-nagios-bridge/README.txt 0000664 0000000 0000000 00000004725 12353073334 0016617 0 ustar 00root root 0000000 0000000
syslog-nagios-bridge
Copyright (C) 2014 Daniel Pocock http://danielpocock.com
https://github.com/dpocock/syslog-nagios-bridge
Dependencies
------------
python-netsyslog
https://github.com/dpocock/python-netsyslog
pynag
http://pynag.org
(using latest code from Git, June 2014,
with the new Utils.CheckResult support)
Installation
------------
Copy syslog-nagios-bridge.py to a suitable location (e.g. /usr/local/bin)
Copy the configuration file to a suitable location (e.g. /etc/nagios3)
Update the config file settings
Edit your syslog daemon, tell it to send events to syslog-nagios-bridge over TCP.
For example, append the following to /etc/rsyslog.conf:
# for rsyslog >= v7.x:
#action(type="omfwd" Target="127.0.0.1" Port="30514" Protocol="tcp" TCP_Framing="octet-counted")
# for rsyslog < v7.x
*.* @@127.0.0.1:30514
Make sure the port number matches the "bind_port" in syslog-bridge.conf
syslog-nagios-bridge.py automatically creates service definitions for each syslog
tag name that it detects. It can put them directly into the nagios configuration directories
or it can generate them in some other place and you can copy them over manually.
In any case, for Nagios to report on a particular host/syslog tag, there must be
a corresponding service definition in /etc/nagios3/conf.d/whatever.cfg. To alert
on errors from the httpd process, you may use the following:
define service{
use generic-service
host_name myhost
service_description httpd - SysLog
check_command return-unknown
active_checks_enabled 0
passive_checks_enabled 1
max_check_attempts 1
}
After doing the configuration, start the bridge and restart/reload the
syslog daemon and Nagios itself:
# su - nagios -c /usr/local/bin/syslog-nagios-bridge.py
# service rsyslog restart
# service nagios3 reload
The relevant services will go into the CRITICAL state after error events
are detected by syslog-nagios-bridge. Nagios has no way to know when
the logs have been checked and whether anybody has taken action to
correct the errors. Consequently, the services will remain in the CRITICAL
state indefinitely. A user must go into the Nagios web interface
and use the option "Submit passive check result for this service"
to put the service back in the OK state. Normally this is only done
after manually investigating the error.
syslog-nagios-bridge/syslog-bridge.conf 0000664 0000000 0000000 00000003713 12353073334 0020536 0 ustar 00root root 0000000 0000000
# Log file for our own activity. If not specified, syslog is used.
# (take care to avoid a feedback loop!)
log_file = "/var/log/nagios3/syslog-bridge.log"
# Default level is WARNING
# (take care to avoid a feedback loop!)
#log_level = logging.DEBUG
# The TCP port where we listen for syslog events
bind_port = 30514
# The check_result_path configured in nagios.cfg:
checkresult_dir = "/var/lib/nagios3/spool/checkresults"
# A directory where generated service defintion files can be
# placed.
svc_def_dir = "/etc/nagios3/syslog.d"
# Generated service definitions should inherit from this template:
svc_tmpl = "generic-service"
# Specify a check command for inclusion in the service definition.
# The command is never actually executed as the service checks are
# only passive. Can be a simple script that always returns 3 (UNKNOWN)
svc_check_dummy = "return-unknown"
# Threshold for syslog events to generate Nagios checkresults
# Choose between LOG_WARNING or LOG_ERR
#svc_state_threshold = syslog.LOG_WARNING
svc_state_threshold = syslog.LOG_ERR
# We want to avoid flooding Nagios with multiple checkresults for a single
# service if there are hundreds of log entries per second.
# Therefore, after submitting a checkresult, we ignore any further events
# for the same service during the subsequent period svc_submission_interval
# (in seconds)
svc_submission_interval = 10
# RFC3164 hostnames usually do not have the domain part.
# RFC5424 hostnames usually do have the domain part (FQDN).
# In Nagios, people normally use the short name/alias and not the FQDN.
# Setting this option ensures that hostnames are normalized for submission
# to Nagios:
hostname_strip_fqdn = True
# If LogAnalyzer is available, the Nagios service defintions can include
# a link to the specific log query for the given host/tag. This link
# will be displayed in the Nagios web UI so the user can click to
# go directly from Nagios to LogAnalyzer.
#loganalyzer_url = "http://log-host/loganalyzer/"
syslog-nagios-bridge/syslog-nagios-bridge.py 0000775 0000000 0000000 00000021131 12353073334 0021514 0 ustar 00root root 0000000 0000000 #!/usr/bin/python
#
# syslog-nagios-bridge - transfer Syslog events to Nagios checkresults file
#
# Project page: https://github.com/dpocock/python-netsyslog
#
# Copyright (C) 2014 Daniel Pocock http://danielpocock.com
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
#
############################################################################
import argparse
import logging
import logging.handlers
import netsyslog
import os
import Queue
import re
from threading import Thread
import sys
import syslog
import time
import urllib
from pynag.Utils import CheckResult
q = Queue.Queue()
hosts = {}
# default values (set from the config file)
log_file = None
log_level = logging.WARNING
loganalyzer_url = None
# This is a subclass of the SyslogTCPHandler from the netsyslog module.
# It receives a notification (call to handle_message) each time a
# syslog event arrives from the network and it puts them into a queue
# for processing on the main thread.
class MyHandler(netsyslog.SyslogTCPHandler):
def handle_message(self, frame):
"""Handle parsed Syslog frames.
"""
logger.debug("severity: %d, facility: %d, tag: %s, PID: %s, host: %s, ts: %s, content: %s" %
(frame.pri.severity,
frame.pri.facility,
frame.msg.tag,
frame.msg.pid,
frame.header.hostname,
frame.header.timestamp,
frame.msg.content))
# queue the frame for examination by the main thread
q.put(frame)
# make sure host names don't contain domain parts
# (some bad syslog implementations send domain parts)
# normalize to lowercase
def clean_host_name(hostname):
if hostname is None:
return None
if hostname == "" or hostname == "-":
return None
if hostname_strip_fqdn:
return hostname.split(".")[0].lower()
return hostname
# make sure tag names don't contain illegal characters
def clean_tag_name(tag):
if tag is None:
return None
if tag == "" or tag == "-":
return None
# FIXME - use something more efficient than a regular expression
_tag = re.sub(r"\W+", "", tag)
if tag != _tag:
logger.warning("detected invalid tag name: %s" % tag)
return _tag
def make_desc(hostname, tag):
"""Create a service description name.
Should return names that exactly match the service descriptions
in the Nagios configuration.
"""
return tag + " - SysLog"
def lookup_app(hostname, tag):
"""Lookup the properties for the tag/application.
Look through our in-memory cache for properties related to
the tag/application on the given host.
"""
logger = logging.getLogger(__name__)
if not hostname in hosts.keys():
_host = {}
hosts[hostname] = _host
logger.debug("first event from host: " + hostname)
else:
_host = hosts[hostname]
if not tag in _host.keys():
_app = {}
_host[tag] = _app
logger.debug("first event from tag: " + tag)
if svc_def_dir is not None:
# see if we need to create a service defintion for the tag
_filename = "syslog_%s_%s.cfg" % (hostname, tag)
svc_def_filename = os.path.join(svc_def_dir, _filename)
if not os.path.exists(svc_def_filename):
logger.debug("creating service def for host %s, tag %s" % (hostname, tag))
# FIXME: can pynag create the service def through the API?
with open(svc_def_filename, "w") as f:
svc_desc = make_desc(hostname, tag)
f.write("define service{\n")
f.write(" use %s\n" % svc_tmpl)
f.write(" host_name %s\n" % hostname)
f.write(" service_description %s\n" % svc_desc)
f.write(" # this is never really executed because active_checks_enabled=0:\n")
f.write(" check_command %s\n" % svc_check_dummy)
f.write(" active_checks_enabled 0\n")
f.write(" passive_checks_enabled 1\n")
f.write(" # generate email notifications after first error:\n")
f.write(" max_check_attempts 1\n")
if loganalyzer_url is not None:
search_query="syslogtag:=%s source:=%s" % (tag, hostname)
action_url = "%s?filter=%s" % (loganalyzer_url, urllib.quote(search_query))
f.write(" action_url %s\n" % action_url)
f.write(" }\n")
else:
_app = _host[tag]
return _app
def handle_frame(frame):
"""Handle a SysLog event.
Looks at the event to decide if it should generate a Nagios
checkresult.
"""
logger = logging.getLogger(__name__)
# Get the hostname and tag, lookup the properties for this pair:
_hostname = clean_host_name(frame.header.hostname)
if _hostname is None:
logger.debug("bad or missing hostname, ignoring message")
return
_tag = clean_tag_name(frame.msg.tag)
if _tag is None:
logger.debug("bad or missing tag, ignoring message")
return
_app = lookup_app(_hostname, _tag)
# Check if we need to notify Nagios
if frame.pri.severity <= svc_state_threshold:
if "last_event" in _app.keys():
if (_app["last_event"] + svc_submission_interval) > time.time():
# ignore multiple error events with svc_submission_interval
# seconds after the last checkresult was sent to Nagios
return
logger.debug("Must tell Nagios")
check_result = CheckResult(checkresult_dir)
desc = make_desc(_hostname, _tag)
output = "PID=%s, logged: %s" % (frame.msg.pid, frame.msg.content)
if frame.pri.severity == syslog.LOG_WARNING:
ret = 1
else:
# for LOG_ERR and worse
ret = 2
check_result.service_result(
_hostname,
desc,
return_code=ret,
output=output,
check_type=1,
check_options=0,
scheduled_check=0,
reschedule_check=0,
latency=0.1,
exited_ok=1)
check_result.submit()
_app["last_event"] = time.time()
# main program code
if __name__ == '__main__':
try:
# parse command line
parser = argparse.ArgumentParser(description="receive Syslog events and generate Nagios check results file")
parser.add_argument(
"config_file",
nargs="?",
help="configuration file",
default="/etc/nagios3/syslog-bridge.conf")
args = parser.parse_args()
# read the configuration file
execfile(args.config_file)
# Setup logging.
# *** Be careful not to create a feedback loop ***
logger = logging.getLogger()
if log_file is not None:
logger.addHandler(logging.FileHandler(log_file))
else:
logger.addHandler(logging.handlers.SysLogHandler())
logger.setLevel(log_level)
# Run the Collector in a thread to listen for incoming connections
c = netsyslog.Collector(bind_port, MyHandler)
thread = Thread(target = c.run)
thread.daemon = True
thread.start()
while True:
try:
# we set a timeout for Queue.get() so that it can be
# interrupted by ctrl-C. See issue no. 1360
# http://bugs.python.org/issue1360
frame = q.get(True, 1)
logger.debug("got a frame from the queue")
try:
handle_frame(frame)
except Exception as e:
logger.error("Failed to handle an event: %s" % e)
except Queue.Empty:
pass
except Exception as e:
logging.error("Unexpected failure: %s" % e)