irker-2.17-1e520c080cc8d39257e54fa7c89e289faeca9ed5/000077500000000000000000000000001267153440000204015ustar00rootroot00000000000000irker-2.17-1e520c080cc8d39257e54fa7c89e289faeca9ed5/.gitignore000066400000000000000000000001271267153440000223710ustar00rootroot00000000000000# Git clutter *.orig # Python bits /*.pyc # Man Pages /*.8 /*.1 # HTML Docs /*.html irker-2.17-1e520c080cc8d39257e54fa7c89e289faeca9ed5/COPYING000066400000000000000000000024361267153440000214410ustar00rootroot00000000000000 BSD LICENSE Copyright (c) 2015, Eric S. Raymond All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. irker-2.17-1e520c080cc8d39257e54fa7c89e289faeca9ed5/Makefile000066400000000000000000000064771267153440000220570ustar00rootroot00000000000000# Makefile for the irker relaying daemon VERS := $(shell sed -n 's/version = "\(.\+\)"/\1/p' irkerd) SYSTEMDSYSTEMUNITDIR := $(shell pkg-config --variable=systemdsystemunitdir systemd) # `prefix`, `mandir` & `DESTDIR` can and should be set on the command # line to control installation locations prefix ?= /usr mandir ?= /share/man target = $(DESTDIR)$(prefix) docs: irkerd.html irkerd.8 irkerhook.html irkerhook.1 irk.html irk.1 irkerd.8: irkerd.xml xmlto man irkerd.xml irkerd.html: irkerd.xml xmlto html-nochunks irkerd.xml irkerhook.1: irkerhook.xml xmlto man irkerhook.xml irkerhook.html: irkerhook.xml xmlto html-nochunks irkerhook.xml irk.1: irk.xml xmlto man irk.xml irk.html: irk.xml xmlto html-nochunks irkerhook.xml install.html: install.txt asciidoc -o install.html install.txt security.html: security.txt asciidoc -o security.html security.txt hacking.html: hacking.txt asciidoc -o hacking.html hacking.txt install: irk.1 irkerd.8 irkerhook.1 uninstall install -m 755 -o 0 -g 0 -d "$(target)/bin" install -m 755 -o 0 -g 0 irkerd "$(target)/bin/irkerd" ifneq ($(strip $(SYSTEMDSYSTEMUNITDIR)),) install -m 755 -o 0 -g 0 -d "$(DESTDIR)$(SYSTEMDSYSTEMUNITDIR)" install -m 644 -o 0 -g 0 irkerd.service "$(DESTDIR)$(SYSTEMDSYSTEMUNITDIR)" endif install -m 755 -o 0 -g 0 -d "$(target)$(mandir)/man8" install -m 755 -o 0 -g 0 irkerd.8 "$(target)$(mandir)/man8/irkerd.8" install -m 755 -o 0 -g 0 -d "$(target)$(mandir)/man1" install -m 755 -o 0 -g 0 irkerhook.1 "$(target)$(mandir)/man1/irkerhook.1" install -m 755 -o 0 -g 0 irk.1 "$(target)$(mandir)/man1/irk.1" uninstall: rm -f "$(target)/bin/irkerd" ifneq ($(strip $(SYSTEMDSYSTEMUNITDIR)),) rm -f "$(DESTDIR)$(SYSTEMDSYSTEMUNITDIR)/irkerd.service" endif rm -f "$(target)$(mandir)/man8/irkerd.8" rm -f "$(target)$(mandir)/man1/irkerhook.1" rm -f "$(target)$(mandir)/man1/irk.1" clean: rm -f irkerd.8 irkerhook.1 irk.1 irker-*.tar.gz *~ *.html PYLINTOPTS = --rcfile=/dev/null --reports=n \ --msg-template="{path}:{line}: [{msg_id}({symbol}), {obj}] {msg}" \ --dummy-variables-rgx='^_' SUPPRESSIONS = "C0103,C0111,C0301,C0302,C0330,C1001,R0201,R0902,R0903,R0912,R0913,R0914,R0915,E1101,W0142,W0201,W0212,W0621,W0702,W0703,W1201,F0401,E0611" pylint: @pylint $(PYLINTOPTS) --disable=$(SUPPRESSIONS) irkerd @pylint $(PYLINTOPTS) --disable=$(SUPPRESSIONS) irkerhook.py loc: @echo "LOC:"; wc -l irkerd irkerhook.py @echo -n "LLOC: "; grep -vE '(^ *#|^ *$$)' irkerd irkerhook.py | wc -l DOCS = \ README \ COPYING \ NEWS \ install.txt \ security.txt \ hacking.txt \ irkerhook.xml \ irkerd.xml \ irk.xml \ SOURCES = \ $(DOCS) \ irkerd \ irkerhook.py \ filter-example.py \ filter-test.py \ irk \ Makefile EXTRA_DIST = \ org.catb.irkerd.plist \ irkerd.service \ irker-logo.png version: @echo $(VERS) irker-$(VERS).tar.gz: $(SOURCES) irkerd.8 irkerhook.1 irk.1 mkdir irker-$(VERS) cp -pR $(SOURCES) $(EXTRA_DIST) irker-$(VERS)/ @COPYFILE_DISABLE=1 tar -cvzf irker-$(VERS).tar.gz irker-$(VERS) rm -fr irker-$(VERS) irker-$(VERS).md5: @md5sum irker-$(VERS).tar.gz >irker-$(VERS).md5 dist: irker-$(VERS).tar.gz irker-$(VERS).md5 WEBDOCS = irkerd.html irk.html irkerhook.html install.html security.html hacking.html release: irker-$(VERS).tar.gz irker-$(VERS).md5 $(WEBDOCS) shipper version=$(VERS) | sh -e -x refresh: $(WEBDOCS) shipper -N -w version=$(VERS) | sh -e -x irker-2.17-1e520c080cc8d39257e54fa7c89e289faeca9ed5/NEWS000066400000000000000000000125211267153440000211010ustar00rootroot00000000000000 irker history 2.17: 2016-03-14 Add a reconnect delay (Debian bug #749650). Add proxy support (requres setting some variables in the source file). Use git abbreviated hash to address Debian complaints. 2.16: 2016-02-18 Code now runs under either Python 2 or Python 3 2.15: 2016-01-12 Emergency backout of getaddrinfo, it randomly hangs. 2.14: 2016-01-12 Lookup with getaddrinfo allows use with IPv6. Documentation improvements. 2.13: 2015-06-14 SSL validation fix. Hardening against Unicode decode errors. irk becomes a library so it can be re-used. 2.12: 2014-10-22 Catch erroneous UTF-8 or non-UTF-8 from servers. Also autodetect the right logging device under FreeBSD: /var/run/syslog 2.11: 2014-06-20 With -i, message string argument now optional, stdin is read if it is absent. Auto-adapt to BSD & OS X log device as well as Linux's. 2.10: 2014-06-19 irk no longer fails on ircs channel URLs. 2.9: 2014-06-01 If irkerd is running in background, log to /dev/syslog (facility daemon). New -H option to set host listening address. Add support for using CertFP to auth to the IRC server, and document it. 2.8: 2014-05-30 Various minor improvements to irk. Cope better with branch names containing slashes. 2.7: 2014-03-15 Add support for ircs:// and SSL/TLS connections to IRC servers. Add support for per-URL usernames and passwords. 2.6: 2014-02-04 Fix for an infinite loop on failing to connect to IRC 2.5: 2013-12-24 Bug fix - remove a deadlock we inherited from irclib. 2.4: 2013-12-03 Bug fix release - some users reported failure to connect with 2.3. Also prevent a crash if Unicode shows up in the wrong place. 2.3: 2013-11-30 -i option enables immediate sending of one line in foreground. 2.2: 2013-11-29 Fixed Unicode processing - got busted in 2.0 when irclib was removed. Show Python traceback on higher debug levels. 2.1: 2013-11-26 A performance improvement in the git repository hook. Documentation polishing. 2.0: 2013-11-16 The dependency on irclib is gone. An email delivery method, suitable for use on SourceForge. irkerhook can now be used as a hg changegroup hook. Prevent misbehavior on UTF-8 in commit metadata. Fix a crash bug on invalid hostnames. 1.20: 2013-05-17 Compatibility back to Python 2.4 (provided simplejson is present). Increased anti-flood delay to avoid trouble with freenode. 1.19: 2013-05-06 Fixed a minor bug in argument processing 1.18: 2013-04-16 Added -l option; irker can now be used as a channel monitor. Added -n and -p option: the nick can be forced and authenticated. 1.17: 2013-02-03 Various minor fixes and bulletproofing. 1.16: 2013-01-24 Deal gracefully with non-ASCII author names and '|' in the command line. 1.15: 2012-12-08 Don't append an extra newline in the Subversion hook. 1.14: 2012-11-26 irclib 5.0 and urlparse compatibility fixes. 1.13: 2012-11-06 Fix for a very rare thread race found by AI0867. Work around a midesign in the IRC library. 1.12: 2012-10-11 Emergency workaround for a Unicode-handling error buried deep in irclib. The IRC library at version 3.2 or later is required for this version! Only ship to freenode #commits by default. 1.11: 2012-10-10 Code is now fully Unicode-safe. A 'cialike' option emulates the file-summary behavior on the old CIA service. 1.10: 2012-10-09 Expire disconnected connections if they aren't needed or can't reconnect. Eventlet support removed - didn't play well with the library mutex. 1.9: 2012-10-08 Proper mutex locks prevent an occasional thread crash on session timeout. There's now systemd installation support for irkerd. 1.8: 2012-10-06 It's now possible to send to nick URLs. Cope gracefully if an IRC server dies or hangs during the nick handshake. 1.7: 2012-10-05 Optional metadata filtering with a user-specified command. irkerd code is now armored against IRC library errors in the delivery threads. 1.6: 2012-10-04 In 1.5 trying to appease pylint broke the Mercurial hook. Added credits for contributors in hacking.txt. Fix the aging out of connections when we hit a resource limit. 1.5: 2012-10-03 Mercurial support. Shorten nick negotiation by choosing a random nick base from a large range. Make irkerd exit cleanly on control-C. 1.4: 2012-10-02 Graceful handling of server disconnects and kicks. Distribution now inclues an installable irkerd plist for Mac OS/X. The color variable is no longer boolean; may be miRC or ANSI. The installation instructions for irkerhook.py have changed! 1.3: 2012-10-01 Support for an irker.conf file to set irkerhook variables under Subversion. Color highlighting of notification fields can be enabled. irkerhook.py now has its own manual page. Added channelmax variable for rate-limiting. irkerd now uses green threads, with much lower overhead. Fix a bug in handling of channel names with no prefix. 1.2: 2012-09-30 All segments of a message with embedded newlines are now transmitted. Message reduction - irkerhook drops the filelist on excessively long ones. Shell quote hardening in irkerhook.py and some anti-DoS logic. 1.1: 2012-09-28 Add a delay to avoid threads spinning on the empty-queue-check, eating CPU. Fix a bug in reporting of multi-file commits. 1.0: 2012-09-27 First production version, somewhat rushed by the sudden death of cia.vc on 24 September. irker-2.17-1e520c080cc8d39257e54fa7c89e289faeca9ed5/README000066400000000000000000000016051267153440000212630ustar00rootroot00000000000000 irker - submission tools for IRC notifications irkerd is a specialized IRC client that runs as a daemon, allowing other programs to ship IRC notifications by sending JSON objects to a listening socket. It is meant to be used by hook scripts in version-control repositories, allowing them to send commit notifications to project IRC channels. A hook script, irkerhook.py, supporting git, hg, and Subversion is included in the distribution; see the install.txt file for installation instructions. The advantage of using this daemon over individual scripted sends is that it can maintain connection state for multiple channels, avoiding obnoxious join/leave spam. The file install.txt describes how to install the software safely, so it can't be used as a spam conduit. Please read the files security.txt and hacking.txt before modifying this code. Eric S. Raymond September 2012 irker-2.17-1e520c080cc8d39257e54fa7c89e289faeca9ed5/control000066400000000000000000000014601267153440000220050ustar00rootroot00000000000000# This is not a real Debian control file, though the syntax is compatible. # It's project metadata for the shipper tool Package: irker Description: An IRC client that runs as a daemon accepting notification requests. You preesnt them JSON objects presented to a listening socket. It is meant to be used by hook scripts in version-control repositories, allowing them to send commit notifications to project IRC channels. A hook script that works with git, hg, and svn is included in the distribution. #XBS-Destinations: freshcode Homepage: http://www.catb.org/~esr/irker XBS-HTML-Target: index.html XBS-Repository-URL: https://gitlab.com/esr/irker XBS-OpenHub-URL: http://www.openhub.net/p/irker XBS-IRC-Channel: irc://chat.freenode.net/#irker XBS-Logo: irker-logo.png XBS-VC-Tag-Template: %(version)s irker-2.17-1e520c080cc8d39257e54fa7c89e289faeca9ed5/filter-example.py000077500000000000000000000005201267153440000236710ustar00rootroot00000000000000#!/usr/bin/env python # This is a trivial example of a metadata filter. # All it does is change the name of the commit's author. # It could do other things, including modifying the # channels list # import sys, json metadata = json.loads(sys.argv[1]) metadata['author'] = "The Great and Powerful Oz" print json.dumps(metadata) # end irker-2.17-1e520c080cc8d39257e54fa7c89e289faeca9ed5/filter-test.py000077500000000000000000000023151267153440000232210ustar00rootroot00000000000000#!/usr/bin/env python # # Test hook to launch an irker instance (if it doesn't already exist) # just before shipping the notification. We start it in in another terminal # so you can watch the debug messages. Intended to be used in the root # directory of the irker repo. Probably only of interest only to irker # developers # # To use this, set up irkerhook.py to fire on each commit. Creating a # .git/hooks/post-commit file containing the line "irkerhook.py"; be # sure to make the opos-commit file executable. Then set the # filtercmd variable in your repo config as follows: # # [irker] # filtercmd = filter-test.py import os, sys, json, subprocess, time metadata = json.loads(sys.argv[1]) ps = subprocess.Popen("ps -U %s uh" % os.getenv("LOGNAME"), shell=True, stdout=subprocess.PIPE) data = ps.stdout.read() irkerd_count = len([x for x in data.split("\n") if x.find("irkerd") != -1]) if irkerd_count: sys.stderr.write("Using a running irker instance...\n") else: sys.stderr.write("Launching a new irker instance...\n") os.system("gnome-terminal --title 'irkerd' -e 'irkerd -d 2' &") time.sleep(1.5) # Avoid a race condition print json.dumps(metadata) # end irker-2.17-1e520c080cc8d39257e54fa7c89e289faeca9ed5/hacking.txt000066400000000000000000000051301267153440000225450ustar00rootroot00000000000000= Hacker's Guide to irker = == Design philosopy == Points to you if some of this seems familiar from GPSD... === Keep mechanism and policy separate === Mechanism goes in irkerd. Policy goes in irkerhook.py irkerd is intended to be super-simple and completely indifferent to what content passes through it. It doesn't know, in any sense, that the use-case it was designed for is broadcasting notifications from version control systems. irkerhook.py is the part that knows about how to mine data from repositories and sets the format of notifications. === If you think the mechanism needs an option, think again === Because irkerhook.py does policy, it takes policy options. Because irkerd is pure mechanism, it shouldn't need any. If you think it does, you have almost certainly got a bug in your thinking. Fix that before you modify code. === Never configure what you can autoconfigure === Human attention is more expensive than machine time. Humans are careless and failure-prone. Therefore, whenever you make a user tell your code something the code can deduce for itself, you are introducing unnecessary inefficiency and unnecessary failure modes. This, in particular, is why irkerhook.py doesn't have a repository type switch. It can deduce the repo type by looking, so it should. == Release procedure == 1. Check for merge requests at the repository. 2. Do 'make pylint' to audit the code. 3. Run irk with a sample message; look at #irker on freenode to verify. 4. Bump the version numbers in irkerd and irkerhook.py 5. Update the NEWS file 6. git commit -a 7. make release == Thanks where due == Alexander van Gessel (AI0867) contributed the Subversion support in irkerhook.py. Since the 1.0 release he has kept as close an eye on the code as the author and has fixed at least as many bugs. //W. here causes asciidoc to see thus as a list entry. W Trevor King added SSL/TLS support and did significant refactoring work. Daniel Franke performed a security audit of irkerd. Georg Brandl contributed the Mercurial support in irkerhook.py and explained how to make Control-C work right. Laurent Bachelier fixed the Makefile so it wouldn't break stuff and wrote the first version of the external filtering option. dak180 (name withheld by request) wrote the OS X launchd plist. Wulf C. Krueger wrote the systemd installation support. Other people on the freenode #irker channel (Kingpin, fpcfan, shadowm, Rick) smoked out bugs in irkerd before they could seriously bug anybody. irker-2.17-1e520c080cc8d39257e54fa7c89e289faeca9ed5/install.txt000066400000000000000000000072211267153440000226120ustar00rootroot00000000000000= Forge installation instructions = irker and irkerhook.py are intended to be installed on forge sites such as SourceForge, GitHub, GitLab, Gna, and Savannah. This file explains the theory of operation, how to install the code, and how to test it. == Theory of operation == irkerhook.py creates JSON notification requests and ships them to irkerd's listener socket. irkerd run as a daemon in order to maintain all the client state required to post multiple notifications while generating a minimum of join/leave messages (which, from the point of view of humans watching irkerd's output, are mere spam). See the security.txt document for a detailed discussion of security and DoS vulnerabilities related to irker. The short version: as long as your firewall blocks port 6659 and irkerd is running inside it, you should be fine. == Prerequisites == You will need either 1. Python at version 2.6 or later, which has JSON built in 2. Python at version no older than 2.4, and a version of the simplejson library installed that it can use. Some newer versions of simplejson discard 2.4 compatibility; 2.0.9 is known to work. == Installing irkerd == irker needs to run constantly, watching for TCP and UDP traffic on port 6659. Install it accordingly. It has no config file; you can just start it up with no arguments. If you want to see what it's doing, give it command-line options -d info for sparse messages and -d debug to show all traffic with IRC servers. You should *not* make irker visible from outside the site firewall, as it can be used to spam IRC channels while masking the source address. The firewall should block port 6659. The design of irker assumes the machine on which it is running is also inside the firewall, so that repository hooks can reach port 6659. The file org.catb.irkerd.plist is a Mac OS/X plist that can be installed to launch irkerd as a boot-time service on that system. == Installing irkerhook.py == Under git, a call to irkerhook.py should be installed in the update hook script of your repo. Under Subversion, the call goes in your repo's post-commit script. Under Mercurial there are two different ways to install it. See the irkerhook manual page for details; the source is irkerhook.xml in this distribution. Note that if you were using the CIA service and have ciabot.py in your git update script, you can simply replace this /path/to/ciabot.py ${refname} $(git rev-list ${oldhead}..${newhead} | tac) with this: /path/to/irkerhook.py --refname=${refname} $(git rev-list ${oldhead}..${newhead} | tac) SourceForge is a special case: see https://github.com/AI0867/sf-git-irker-pipeline for tools and instructions on how to work around its limitations. == Testing == To verify that your repo produces well-formed JSON notifications, you can run irkerhook.py in the repo directory using the -n switch, which emits JSON to standard output rather than attempting to ship to an irkerd instance. Then, start irkerd and call irkerhook.py while watching the freenode #commits channel. The 'irk' script is a little test tool that takes two arguments, a channel and a message, and does what you'd expect. If you need help, there's a project chat channel at irc://chat.freenode.net/#irker == Read-only access == If, for whatever reason, you can't modify the hook scripts in your repository, there is still hope. There's a proxy that takes CIA XML-RPC notifications and passes them to a local irker instance. Find it here: https://github.com/nenolod/irker-cia-proxy There's also a poller daemon that can watch activity in a Subversion repository and ship notifications via an irker instance. https://github.com/shikadilord/irker-svnpoller irker-2.17-1e520c080cc8d39257e54fa7c89e289faeca9ed5/irk000077500000000000000000000025351267153440000211210ustar00rootroot00000000000000#!/usr/bin/env python # Illustrates how to test irkerd. # # First argument must be a channel URL. If it does not begin with "irc", # the base URL for freenode is prepended. # # Second argument must be a payload string. Standard C-style escapes # such as \n and \t are decoded. # # SPDX-License-Identifier: BSD-2-Clause import json import socket import sys import fileinput DEFAULT_SERVER = ("localhost", 6659) def connect(server = DEFAULT_SERVER): return socket.create_connection(server) def send(s, target, message): data = {"to": target, "privmsg" : message} #print(json.dumps(data)) s.sendall(bytes(json.dumps(data, encoding="ascii"))) def irk(target, message, server = DEFAULT_SERVER): s = connect(server) if "irc:" not in target and "ircs:" not in target: target = "irc://chat.freenode.net/{0}".format(target) if message == '-': for line in fileinput.input('-'): send(s, target, line.rstrip('\n')) else: send(s, target, message) s.close() def main(): target = sys.argv[1] message = " ".join(sys.argv[2:]) # XXX: why is this necessary? #message = message.decode('string_escape') try: irk(target, message) except socket.error as e: sys.stderr.write("irk: write to server failed: %r\n" % e) sys.exit(1) if __name__ == '__main__': main() irker-2.17-1e520c080cc8d39257e54fa7c89e289faeca9ed5/irk.xml000066400000000000000000000053351267153440000217160ustar00rootroot00000000000000 irk 1 Apr 30 2014 irker irker Commands irk test program for irkerd irk target message text DESCRIPTION irk is a simple test program for irkerd8. It will construct a simple JSON object and pass it to the daemon running on localhost. OPTIONS irk takes the following options: target Which server and channel to join to announced the message. If not prefixed with "irc:", it will prefix "irc://chat.freenode.net/" to the argument before passing it directly to irkerd. This argument is passed as the "to" parameter in the JSON object. message Which message to send to the target specified above. If the string "-", the message will be read from standard input, with newlines stripped. LIMITATIONS irk has no commandline usage and may be riddled with bugs. irk doesn't know how to talk to your favorite VCS. You will generally want to use irkerhook1 instead irk has also all the limitations of irkerd. SEE ALSO irkerhook1, AUTHOR Eric S. Raymond esr@snark.thyrsus.com. See the project page at http://www.catb.org/~esr/irker for updates and other resources, including an installable repository hook script. irker-2.17-1e520c080cc8d39257e54fa7c89e289faeca9ed5/irkerd000077500000000000000000001271551267153440000216220ustar00rootroot00000000000000#!/usr/bin/env python """ irkerd - a simple IRC multiplexer daemon Listens for JSON objects of the form {'to':, 'privmsg':} and relays messages to IRC channels. Each request must be followed by a newline. The must be a string. The value of the 'to' attribute can be a string containing an IRC URL (e.g. 'irc://chat.freenet.net/botwar') or a list of such strings; in the latter case the message is broadcast to all listed channels. Note that the channel portion of the URL need *not* have a leading '#' unless the channel name itself does. Design and code by Eric S. Raymond . See the project resource page at . Requires Python 2.7, or: * 2.6 with the argparse package installed. * Any 3.x """ # SPDX-License-Identifier: BSD-2-Clause # These things might need tuning HOST = "localhost" PORT = 6659 PROXY_TYPE = None # Use proxy if set 1: SOCKS4, 2: SOCKS5, 3: HTTP PROXY_HOST = "" PROXY_PORT = 1080 XMIT_TTL = (3 * 60 * 60) # Time to live, seconds from last transmit PING_TTL = (15 * 60) # Time to live, seconds from last PING HANDSHAKE_TTL = 60 # Time to live, seconds from nick transmit CHANNEL_TTL = (3 * 60 * 60) # Time to live, seconds from last transmit DISCONNECT_TTL = (24 * 60 * 60) # Time to live, seconds from last connect UNSEEN_TTL = 60 # Time to live, seconds since first request CHANNEL_MAX = 18 # Max channels open per socket (default) ANTI_FLOOD_DELAY = 1.0 # Anti-flood delay after transmissions, seconds ANTI_BUZZ_DELAY = 0.09 # Anti-buzz delay after queue-empty check CONNECTION_MAX = 200 # To avoid hitting a thread limit RECONNECT_DELAY = 3 # Don't spam servers with connection attempts # No user-serviceable parts below this line version = "2.17" import argparse import logging import logging.handlers import json import os import os.path try: # Python 3 import queue except ImportError: # Python 2 import Queue as queue import random import re import select import signal import socket try: import socks socks_on = True except ImportError: socks_on = False try: # Python 3 import socketserver except ImportError: # Python 2 import SocketServer as socketserver import ssl import sys import threading import time import traceback try: # Python 3 import urllib.parse as urllib_parse except ImportError: # Python 2 import urlparse as urllib_parse LOG = logging.getLogger(__name__) LOG.setLevel(logging.ERROR) LOG_LEVELS = ['critical', 'error', 'warning', 'info', 'debug'] try: # Python 2 UNICODE_TYPE = unicode except NameError: # Python 3 UNICODE_TYPE = str # Sketch of implementation: # # One Irker object manages multiple IRC sessions. It holds a map of # Dispatcher objects, one per (server, port) combination, which are # responsible for routing messages to one of any number of Connection # objects that do the actual socket conversations. The reason for the # Dispatcher layer is that IRC daemons limit the number of channels a # client (that is, from the daemon's point of view, a socket) can be # joined to, so each session to a server needs a flock of Connection # instances each with its own socket. # # Connections are timed out and removed when either they haven't seen a # PING for a while (indicating that the server may be stalled or down) # or there has been no message traffic to them for a while, or # even if the queue is nonempty but efforts to connect have failed for # a long time. # # There are multiple threads. One accepts incoming traffic from all # servers. Each Connection also has a consumer thread and a # thread-safe message queue. The program main appends messages to # queues as JSON requests are received; the consumer threads try to # ship them to servers. When a socket write stalls, it only blocks an # individual consumer thread; if it stalls long enough, the session # will be timed out. This solves the biggest problem with a # single-threaded implementation, which is that you can't count on a # single stalled write not hanging all other traffic - you're at the # mercy of the length of the buffers in the TCP/IP layer. # # Message delivery is thus not reliable in the face of network stalls, # but this was considered acceptable because IRC (notoriously) has the # same problem - there is little point in reliable delivery to a relay # that is down or unreliable. # # This code uses only NICK, JOIN, PART, MODE, PRIVMSG, USER, and QUIT. # It is strictly compliant to RFC1459, except for the interpretation and # use of the DEAF and CHANLIMIT and (obsolete) MAXCHANNELS features. # # CHANLIMIT is as described in the Internet RFC draft # draft-brocklesby-irc-isupport-03 at . # The ",isnick" feature is as described in # . # Historical note: the IRCClient and IRCServerConnection classes # (~270LOC) replace the overweight, overcomplicated 3KLOC mass of # irclib code that irker formerly used as a service library. They # still look similar to parts of irclib because I contributed to that # code before giving up on it. class IRCError(BaseException): "An IRC exception" pass class InvalidRequest(ValueError): "An invalid JSON request" pass class IRCClient(): "An IRC client session to one or more servers." def __init__(self): self.mutex = threading.RLock() self.server_connections = [] self.event_handlers = {} self.add_event_handler("ping", lambda c, e: c.ship("PONG %s" % e.target)) def newserver(self): "Initialize a new server-connection object." conn = IRCServerConnection(self) with self.mutex: self.server_connections.append(conn) return conn def spin(self, timeout=0.2): "Spin processing data from connections forever." # Outer loop should specifically *not* be mutex-locked. # Otherwise no other thread would ever be able to change # the shared state of an IRC object running this function. while True: nextsleep = 0 with self.mutex: connected = [x for x in self.server_connections if x is not None and x.socket is not None] sockets = [x.socket for x in connected] if sockets: connmap = dict([(c.socket.fileno(), c) for c in connected]) (insocks, _o, _e) = select.select(sockets, [], [], timeout) for s in insocks: try: connmap[s.fileno()].consume() except UnicodeDecodeError as e: LOG.warn('{0}: invalid encoding ({1})'.format( self, e)) else: nextsleep = timeout time.sleep(nextsleep) def add_event_handler(self, event, handler): "Set a handler to be called later." with self.mutex: event_handlers = self.event_handlers.setdefault(event, []) event_handlers.append(handler) def handle_event(self, connection, event): with self.mutex: h = self.event_handlers th = sorted(h.get("all_events", []) + h.get(event.type, [])) for handler in th: handler(connection, event) def drop_connection(self, connection): with self.mutex: self.server_connections.remove(connection) class LineBufferedStream(): "Line-buffer a read stream." _crlf_re = re.compile(b'\r?\n') def __init__(self): self.buffer = b'' def append(self, newbytes): self.buffer += newbytes def lines(self): "Iterate over lines in the buffer." lines = self._crlf_re.split(self.buffer) self.buffer = lines.pop() return iter(lines) def __iter__(self): return self.lines() class IRCServerConnectionError(IRCError): pass class IRCServerConnection(): command_re = re.compile("^(:(?P[^ ]+) +)?(?P[^ ]+)( *(?P .+))?") # The full list of numeric-to-event mappings is in Perl's Net::IRC. # We only need to ensure that if some ancient server throws numerics # for the ones we actually want to catch, they're mapped. codemap = { "001": "welcome", "005": "featurelist", "432": "erroneusnickname", "433": "nicknameinuse", "436": "nickcollision", "437": "unavailresource", } def __init__(self, master): self.master = master self.socket = None def _wrap_socket(self, socket, target, certfile=None, cafile=None, protocol=ssl.PROTOCOL_TLSv1): try: # Python 3.2 and greater ssl_context = ssl.SSLContext(protocol) except AttributeError: # Python < 3.2 self.socket = ssl.wrap_socket( socket, certfile=certfile, cert_reqs=ssl.CERT_REQUIRED, ssl_version=protocol, ca_certs=cafile) else: ssl_context.verify_mode = ssl.CERT_REQUIRED if certfile: ssl_context.load_cert_chain(certfile) if cafile: ssl_context.load_verify_locations(cafile=cafile) else: ssl_context.set_default_verify_paths() kwargs = {} if ssl.HAS_SNI: kwargs['server_hostname'] = target.servername self.socket = ssl_context.wrap_socket(socket, **kwargs) return self.socket def _check_hostname(self, target): if hasattr(ssl, 'match_hostname'): # Python >= 3.2 cert = self.socket.getpeercert() try: ssl.match_hostname(cert, target.servername) except ssl.CertificateError as e: raise IRCServerConnectionError( 'Invalid SSL/TLS certificate: %s' % e) else: # Python < 3.2 LOG.warning( 'cannot check SSL/TLS hostname with Python %s' % sys.version) def connect(self, target, nickname, username=None, realname=None, **kwargs): LOG.debug("connect(server=%r, port=%r, nickname=%r, ...)" % ( target.servername, target.port, nickname)) if self.socket is not None: self.disconnect("Changing servers") self.buffer = LineBufferedStream() self.event_handlers = {} self.real_server_name = "" self.target = target self.nickname = nickname try: if socks_on and PROXY_TYPE: self.socket = socks.socksocket(socket.AF_INET,socket.SOCK_STREAM) self.socket.set_proxy(PROXY_TYPE, PROXY_HOST, PROXY_PORT) else: self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) if target.ssl: self.socket = self._wrap_socket( socket=self.socket, target=target, **kwargs) self.socket.bind(('', 0)) self.socket.connect((target.servername, target.port)) except socket.error as err: raise IRCServerConnectionError("Couldn't connect to socket: %s" % err) if target.ssl: self._check_hostname(target=target) if target.password: self.ship("PASS " + target.password) self.nick(self.nickname) self.user( username=target.username or username or 'irker', realname=realname or 'irker relaying client') return self def close(self): # Without this thread lock, there is a window during which # select() can find a closed socket, leading to an EBADF error. with self.master.mutex: self.disconnect("Closing object") self.master.drop_connection(self) def consume(self): try: incoming = self.socket.recv(16384) except socket.error: # Server hung up on us. self.disconnect("Connection reset by peer") return if not incoming: # Dead air also indicates a connection reset. self.disconnect("Connection reset by peer") return self.buffer.append(incoming) for line in self.buffer: if not isinstance(line, UNICODE_TYPE): line = UNICODE_TYPE(line, 'utf-8') LOG.debug("FROM: %s" % line) if not line: continue prefix = None command = None arguments = None self.handle_event(Event("every_raw_message", self.real_server_name, None, [line])) m = IRCServerConnection.command_re.match(line) if m.group("prefix"): prefix = m.group("prefix") if not self.real_server_name: self.real_server_name = prefix if m.group("command"): command = m.group("command").lower() if m.group("argument"): a = m.group("argument").split(" :", 1) arguments = a[0].split() if len(a) == 2: arguments.append(a[1]) command = IRCServerConnection.codemap.get(command, command) if command in ["privmsg", "notice"]: target = arguments.pop(0) else: target = None if command == "quit": arguments = [arguments[0]] elif command == "ping": target = arguments[0] else: target = arguments[0] arguments = arguments[1:] LOG.debug("command: %s, source: %s, target: %s, arguments: %s" % ( command, prefix, target, arguments)) self.handle_event(Event(command, prefix, target, arguments)) def handle_event(self, event): self.master.handle_event(self, event) if event.type in self.event_handlers: for fn in self.event_handlers[event.type]: fn(self, event) def is_connected(self): return self.socket is not None def disconnect(self, message=""): if self.socket is None: return # Don't send a QUIT here - causes infinite loop! try: self.socket.shutdown(socket.SHUT_WR) self.socket.close() except socket.error: pass del self.socket self.socket = None self.handle_event( Event("disconnect", self.target.server, "", [message])) def join(self, channel, key=""): self.ship("JOIN %s%s" % (channel, (key and (" " + key)))) def mode(self, target, command): self.ship("MODE %s %s" % (target, command)) def nick(self, newnick): self.ship("NICK " + newnick) def part(self, channel, message=""): cmd_parts = ['PART', channel] if message: cmd_parts.append(message) self.ship(' '.join(cmd_parts)) def privmsg(self, target, text): self.ship("PRIVMSG %s :%s" % (target, text)) def quit(self, message=""): self.ship("QUIT" + (message and (" :" + message))) def user(self, username, realname): self.ship("USER %s 0 * :%s" % (username, realname)) def ship(self, string): "Ship a command to the server, appending CR/LF" try: self.socket.send(string.encode('utf-8') + b'\r\n') LOG.debug("TO: %s" % string) except socket.error: self.disconnect("Connection reset by peer.") class Event(object): def __init__(self, evtype, source, target, arguments=None): self.type = evtype self.source = source self.target = target if arguments is None: arguments = [] self.arguments = arguments def is_channel(string): return string and string[0] in "#&+!" class Connection: def __init__(self, irker, target, nick_template, nick_needs_number=False, password=None, **kwargs): self.irker = irker self.target = target self.nick_template = nick_template self.nick_needs_number = nick_needs_number self.password = password self.kwargs = kwargs self.nick_trial = None self.connection = None self.status = None self.last_xmit = time.time() self.last_ping = time.time() self.channels_joined = {} self.channel_limits = {} # The consumer thread self.queue = queue.Queue() self.thread = None def nickname(self, n=None): "Return a name for the nth server connection." if n is None: n = self.nick_trial if self.nick_needs_number: return self.nick_template % n else: return self.nick_template def handle_ping(self): "Register the fact that the server has pinged this connection." self.last_ping = time.time() def handle_welcome(self): "The server says we're OK, with a non-conflicting nick." self.status = "ready" LOG.info("nick %s accepted" % self.nickname()) if self.password: self.connection.privmsg("nickserv", "identify %s" % self.password) def handle_badnick(self): "The server says our nick is ill-formed or has a conflict." LOG.info("nick %s rejected" % self.nickname()) if self.nick_needs_number: # Randomness prevents a malicious user or bot from # anticipating the next trial name in order to block us # from completing the handshake. self.nick_trial += random.randint(1, 3) self.last_xmit = time.time() self.connection.nick(self.nickname()) # Otherwise fall through, it might be possible to # recover manually. def handle_disconnect(self): "Server disconnected us for flooding or some other reason." self.connection = None if self.status != "expired": self.status = "disconnected" # Avoid flooding the server if it disconnects # immediately on sucessful login. time.sleep(RECONNECT_DELAY) def handle_kick(self, outof): "We've been kicked." self.status = "handshaking" try: del self.channels_joined[outof] except KeyError: LOG.error("irkerd: kicked by %s from %s that's not joined" % ( self.target, outof)) qcopy = [] while not self.queue.empty(): (channel, message, key) = self.queue.get() if channel != outof: qcopy.append((channel, message, key)) for (channel, message, key) in qcopy: self.queue.put((channel, message, key)) self.status = "ready" def enqueue(self, channel, message, key, quit_after=False): "Enque a message for transmission." if self.thread is None or not self.thread.is_alive(): self.status = "unseen" self.thread = threading.Thread(target=self.dequeue) self.thread.setDaemon(True) self.thread.start() self.queue.put((channel, message, key)) if quit_after: self.queue.put((channel, None, key)) def dequeue(self): "Try to ship pending messages from the queue." try: while True: # We want to be kind to the IRC servers and not hold unused # sockets open forever, so they have a time-to-live. The # loop is coded this particular way so that we can drop # the actual server connection when its time-to-live # expires, then reconnect and resume transmission if the # queue fills up again. if self.queue.empty(): # Queue is empty, at some point we want to time out # the connection rather than holding a socket open in # the server forever. now = time.time() xmit_timeout = now > self.last_xmit + XMIT_TTL ping_timeout = now > self.last_ping + PING_TTL if self.status == "disconnected": # If the queue is empty, we can drop this connection. self.status = "expired" break elif xmit_timeout or ping_timeout: LOG.info(( "timing out connection to %s at %s " "(ping_timeout=%s, xmit_timeout=%s)") % ( self.target, time.asctime(), ping_timeout, xmit_timeout)) with self.irker.irc.mutex: self.connection.context = None self.connection.quit("transmission timeout") self.connection = None self.status = "disconnected" else: # Prevent this thread from hogging the CPU by pausing # for just a little bit after the queue-empty check. # As long as this is less that the duration of a human # reflex arc it is highly unlikely any human will ever # notice. time.sleep(ANTI_BUZZ_DELAY) elif self.status == "disconnected" \ and time.time() > self.last_xmit + DISCONNECT_TTL: # Queue is nonempty, but the IRC server might be # down. Letting failed connections retain queue # space forever would be a memory leak. self.status = "expired" break elif not self.connection and self.status != "expired": # Queue is nonempty but server isn't connected. with self.irker.irc.mutex: self.connection = self.irker.irc.newserver() self.connection.context = self # Try to avoid colliding with other instances self.nick_trial = random.randint(1, 990) self.channels_joined = {} try: # This will throw # IRCServerConnectionError on failure self.connection.connect( target=self.target, nickname=self.nickname(), **self.kwargs) self.status = "handshaking" LOG.info("XMIT_TTL bump (%s connection) at %s" % ( self.target, time.asctime())) self.last_xmit = time.time() self.last_ping = time.time() except IRCServerConnectionError as e: LOG.error("irkerd: %s" % e) self.status = "expired" break elif self.status == "handshaking": if time.time() > self.last_xmit + HANDSHAKE_TTL: self.status = "expired" break else: # Don't buzz on the empty-queue test while we're # handshaking time.sleep(ANTI_BUZZ_DELAY) elif self.status == "unseen" \ and time.time() > self.last_xmit + UNSEEN_TTL: # Nasty people could attempt a denial-of-service # attack by flooding us with requests with invalid # servernames. We guard against this by rapidly # expiring connections that have a nonempty queue but # have never had a successful open. self.status = "expired" break elif self.status == "ready": (channel, message, key) = self.queue.get() if channel not in self.channels_joined: self.connection.join(channel, key=key) LOG.info("joining %s on %s." % (channel, self.target)) # None is magic - it's a request to quit the server if message is None: self.connection.quit() # An empty message might be used as a keepalive or # to join a channel for logging, so suppress the # privmsg send unless there is actual traffic. elif message: for segment in message.split("\n"): # Truncate the message if it's too long, # but we're working with characters here, # not bytes, so we could be off. # 500 = 512 - CRLF - 'PRIVMSG ' - ' :' maxlength = 500 - len(channel) if len(segment) > maxlength: segment = segment[:maxlength] try: self.connection.privmsg(channel, segment) except ValueError as err: LOG.warning(( "rejected a message to %s on %s " "because: %s") % ( channel, self.target, UNICODE_TYPE(err))) LOG.debug(traceback.format_exc()) time.sleep(ANTI_FLOOD_DELAY) self.last_xmit = self.channels_joined[channel] = time.time() LOG.info("XMIT_TTL bump (%s transmission) at %s" % ( self.target, time.asctime())) self.queue.task_done() elif self.status == "expired": LOG.error( "irkerd: we're expired but still running! This is a bug.") break except Exception as e: LOG.error("irkerd: exception %s in thread for %s" % (e, self.target)) # Maybe this should have its own status? self.status = "expired" LOG.debug(traceback.format_exc()) finally: # Make sure we don't leave any zombies behind self.connection.close() def live(self): "Should this connection not be scavenged?" return self.status != "expired" def joined_to(self, channel): "Is this connection joined to the specified channel?" return channel in self.channels_joined def accepting(self, channel): "Can this connection accept a join of this channel?" if self.channel_limits: match_count = 0 for already in self.channels_joined: # This obscure code is because the RFCs allow separate limits # by channel type (indicated by the first character of the name) # a feature that is almost never actually used. if already[0] == channel[0]: match_count += 1 return match_count < self.channel_limits.get(channel[0], CHANNEL_MAX) else: return len(self.channels_joined) < CHANNEL_MAX class Target(): "Represent a transmission target." def __init__(self, url): self.url = url parsed = urllib_parse.urlparse(url) self.ssl = parsed.scheme == 'ircs' if self.ssl: default_ircport = 6697 else: default_ircport = 6667 self.username = parsed.username self.password = parsed.password self.servername = parsed.hostname self.port = parsed.port or default_ircport # IRC channel names are case-insensitive. If we don't smash # case here we may run into problems later. There was a bug # observed on irc.rizon.net where an irkerd user specified #Channel, # got kicked, and irkerd crashed because the server returned # "#channel" in the notification that our kick handler saw. self.channel = parsed.path.lstrip('/').lower() # This deals with a tweak in recent versions of urlparse. if parsed.fragment: self.channel += "#" + parsed.fragment isnick = self.channel.endswith(",isnick") if isnick: self.channel = self.channel[:-7] if self.channel and not isnick and self.channel[0] not in "#&+": self.channel = "#" + self.channel # support both channel?secret and channel?key=secret self.key = "" if parsed.query: self.key = re.sub("^key=", "", parsed.query) def __str__(self): "Represent this instance as a string" return self.servername or self.url or repr(self) def validate(self): "Raise InvalidRequest if the URL is missing a critical component" if not self.servername: raise InvalidRequest( 'target URL missing a servername: %r' % self.url) if not self.channel: raise InvalidRequest( 'target URL missing a channel: %r' % self.url) def server(self): "Return a hashable tuple representing the destination server." return (self.servername, self.port) class Dispatcher: "Manage connections to a particular server-port combination." def __init__(self, irker, **kwargs): self.irker = irker self.kwargs = kwargs self.connections = [] def dispatch(self, channel, message, key, quit_after=False): "Dispatch messages for our server-port combination." # First, check if there is room for another channel # on any of our existing connections. connections = [x for x in self.connections if x.live()] eligibles = [x for x in connections if x.joined_to(channel)] \ or [x for x in connections if x.accepting(channel)] if eligibles: eligibles[0].enqueue(channel, message, key, quit_after) return # All connections are full up. Look for one old enough to be # scavenged. ancients = [] for connection in connections: for (chan, age) in connections.channels_joined.items(): if age < time.time() - CHANNEL_TTL: ancients.append((connection, chan, age)) if ancients: ancients.sort(key=lambda x: x[2]) (found_connection, drop_channel, _drop_age) = ancients[0] found_connection.part(drop_channel, "scavenged by irkerd") del found_connection.channels_joined[drop_channel] #time.sleep(ANTI_FLOOD_DELAY) found_connection.enqueue(channel, message, key, quit_after) return # All existing channels had recent activity newconn = Connection(self.irker, **self.kwargs) self.connections.append(newconn) newconn.enqueue(channel, message, key, quit_after) def live(self): "Does this server-port combination have any live connections?" self.connections = [x for x in self.connections if x.live()] return len(self.connections) > 0 def pending(self): "Return all connections with pending traffic." return [x for x in self.connections if not x.queue.empty()] def last_xmit(self): "Return the time of the most recent transmission." return max(x.last_xmit for x in self.connections) class Irker: "Persistent IRC multiplexer." def __init__(self, logfile=None, **kwargs): self.logfile = logfile self.kwargs = kwargs self.irc = IRCClient() self.irc.add_event_handler("ping", self._handle_ping) self.irc.add_event_handler("welcome", self._handle_welcome) self.irc.add_event_handler("erroneusnickname", self._handle_badnick) self.irc.add_event_handler("nicknameinuse", self._handle_badnick) self.irc.add_event_handler("nickcollision", self._handle_badnick) self.irc.add_event_handler("unavailresource", self._handle_badnick) self.irc.add_event_handler("featurelist", self._handle_features) self.irc.add_event_handler("disconnect", self._handle_disconnect) self.irc.add_event_handler("kick", self._handle_kick) self.irc.add_event_handler("every_raw_message", self._handle_every_raw_message) self.servers = {} def thread_launch(self): thread = threading.Thread(target=self.irc.spin) thread.setDaemon(True) self.irc._thread = thread thread.start() def _handle_ping(self, connection, _event): "PING arrived, bump the last-received time for the connection." if connection.context: connection.context.handle_ping() def _handle_welcome(self, connection, _event): "Welcome arrived, nick accepted for this connection." if connection.context: connection.context.handle_welcome() def _handle_badnick(self, connection, _event): "Nick not accepted for this connection." if connection.context: connection.context.handle_badnick() def _handle_features(self, connection, event): "Determine if and how we can set deaf mode." if connection.context: cxt = connection.context arguments = event.arguments for lump in arguments: if lump.startswith("DEAF="): if not self.logfile: connection.mode(cxt.nickname(), "+"+lump[5:]) elif lump.startswith("MAXCHANNELS="): m = int(lump[12:]) for pref in "#&+": cxt.channel_limits[pref] = m LOG.info("%s maxchannels is %d" % (connection.target, m)) elif lump.startswith("CHANLIMIT=#:"): limits = lump[10:].split(",") try: for token in limits: (prefixes, limit) = token.split(":") limit = int(limit) for c in prefixes: cxt.channel_limits[c] = limit LOG.info("%s channel limit map is %s" % ( connection.target, cxt.channel_limits)) except ValueError: LOG.error("irkerd: ill-formed CHANLIMIT property") def _handle_disconnect(self, connection, _event): "Server hung up the connection." LOG.info("server %s disconnected" % connection.target) connection.close() if connection.context: connection.context.handle_disconnect() def _handle_kick(self, connection, event): "Server hung up the connection." target = event.target LOG.info("irker has been kicked from %s on %s" % ( target, connection.target)) if connection.context: connection.context.handle_kick(target) def _handle_every_raw_message(self, _connection, event): "Log all messages when in watcher mode." if self.logfile: with open(self.logfile, "ab") as logfp: message = u"%03f|%s|%s\n" % \ (time.time(), event.source, event.arguments[0]) logfp.write(message.encode('utf-8')) def pending(self): "Do we have any pending message traffic?" return [k for (k, v) in self.servers.items() if v.pending()] def _parse_request(self, line): "Request-parsing helper for the handle() method" request = json.loads(line.strip()) if not isinstance(request, dict): raise InvalidRequest( "request is not a JSON dictionary: %r" % request) if "to" not in request or "privmsg" not in request: raise InvalidRequest( "malformed request - 'to' or 'privmsg' missing: %r" % request) channels = request['to'] message = request['privmsg'] if not isinstance(channels, (list, UNICODE_TYPE)): raise InvalidRequest( "malformed request - unexpected channel type: %r" % channels) if not isinstance(message, UNICODE_TYPE): raise InvalidRequest( "malformed request - unexpected message type: %r" % message) if not isinstance(channels, list): channels = [channels] targets = [] for url in channels: try: if not isinstance(url, UNICODE_TYPE): raise InvalidRequest( "malformed request - URL has unexpected type: %r" % url) target = Target(url) target.validate() except InvalidRequest as e: LOG.error("irkerd: " + UNICODE_TYPE(e)) else: targets.append(target) return (targets, message) def handle(self, line, quit_after=False): "Perform a JSON relay request." try: targets, message = self._parse_request(line=line) for target in targets: if target.server() not in self.servers: self.servers[target.server()] = Dispatcher( self, target=target, **self.kwargs) self.servers[target.server()].dispatch( target.channel, message, target.key, quit_after=quit_after) # GC dispatchers with no active connections servernames = self.servers.keys() for servername in servernames: if not self.servers[servername].live(): del self.servers[servername] # If we might be pushing a resource limit even # after garbage collection, remove a session. The # goal here is to head off DoS attacks that aim at # exhausting thread space or file descriptors. # The cost is that attempts to DoS this service # will cause lots of join/leave spam as we # scavenge old channels after connecting to new # ones. The particular method used for selecting a # session to be terminated doesn't matter much; we # choose the one longest idle on the assumption # that message activity is likely to be clumpy. if len(self.servers) >= CONNECTION_MAX: oldest = min( self.servers.keys(), key=lambda name: self.servers[name].last_xmit()) del self.servers[oldest] except InvalidRequest as e: LOG.error("irkerd: " + UNICODE_TYPE(e)) except ValueError: LOG.error("irkerd: " + "can't recognize JSON on input: %r" % line) except RuntimeError: LOG.error("irkerd: " + "wildly malformed JSON blew the parser stack.") class IrkerTCPHandler(socketserver.StreamRequestHandler): def handle(self): while True: line = self.rfile.readline() if not line: break if not isinstance(line, UNICODE_TYPE): line = UNICODE_TYPE(line, 'utf-8') irker.handle(line=line.strip()) class IrkerUDPHandler(socketserver.BaseRequestHandler): def handle(self): line = self.request[0].strip() #socket = self.request[1] if not isinstance(line, UNICODE_TYPE): line = UNICODE_TYPE(line, 'utf-8') irker.handle(line=line.strip()) def in_background(): "Is this process running in background?" try: return os.getpgrp() != os.tcgetpgrp(1) except OSError: return True if __name__ == '__main__': parser = argparse.ArgumentParser( description=__doc__.strip().splitlines()[0]) parser.add_argument( '-c', '--ca-file', metavar='PATH', help='file of trusted certificates for SSL/TLS') parser.add_argument( '-e', '--cert-file', metavar='PATH', help='pem file used to authenticate to the server') parser.add_argument( '-d', '--log-level', metavar='LEVEL', choices=LOG_LEVELS, help='how much to log to the log file (one of %(choices)s)') parser.add_argument( '-H', '--host', metavar='ADDRESS', default=HOST, help='IP address to listen on') parser.add_argument( '-l', '--log-file', metavar='PATH', help='file for saving captured message traffic') parser.add_argument( '-n', '--nick', metavar='NAME', default='irker%03d', help="nickname (optionally with a '%%.*d' server connection marker)") parser.add_argument( '-p', '--password', metavar='PASSWORD', help='NickServ password') parser.add_argument( '-i', '--immediate', metavar='IRC-URL', help=( 'send a single message to IRC-URL and exit. The message is the ' 'first positional argument.')) parser.add_argument( '-V', '--version', action='version', version='%(prog)s {0}'.format(version)) parser.add_argument( 'message', metavar='MESSAGE', nargs='?', help='message for --immediate mode') args = parser.parse_args() if not args.log_file and in_background(): # The Linux, Mac, and FreeBSD values of the logging device. logdev = [x for x in ('/dev/log', '/var/run/syslog', '/var/run/log') if os.path.exists(x) and not os.path.isdir(x)] if len(logdev) != 1: sys.stderr.write("can't initialize log device, bailing out!\n") raise SystemExit(1) # There's a case for falling back to address = ('localhost', 514) # But some systems (including OS X) disable this for security reasons. handler = logging.handlers.SysLogHandler(address=logdev[0], facility='daemon') else: handler = logging.StreamHandler() LOG.addHandler(handler) if args.log_level: log_level = getattr(logging, args.log_level.upper()) LOG.setLevel(log_level) irker = Irker( logfile=args.log_file, nick_template=args.nick, nick_needs_number=re.search('%.*d', args.nick), password=args.password, cafile=args.ca_file, certfile=args.cert_file, ) LOG.info("irkerd version %s" % version) if args.immediate: if not args.message: # We want newline to become '\n' and tab to become '\t'; # the JSON decoder will undo these transformations. # This will also encode backslash, backspace, formfeed, # and high-half characters, which might produce unexpected # results on output. args.message = sys.stdin.read().encode("string_escape") irker.irc.add_event_handler("quit", lambda _c, _e: sys.exit(0)) irker.handle('{"to":"%s","privmsg":"%s"}' % ( args.immediate, args.message), quit_after=True) irker.irc.spin() else: if args.message: LOG.error( 'irkerd: message argument given (%r), but --immediate not set' % ( args.message)) raise SystemExit(1) irker.thread_launch() try: tcpserver = socketserver.TCPServer((args.host, PORT), IrkerTCPHandler) udpserver = socketserver.UDPServer((args.host, PORT), IrkerUDPHandler) for server in [tcpserver, udpserver]: server = threading.Thread(target=server.serve_forever) server.setDaemon(True) server.start() try: signal.pause() except KeyboardInterrupt: raise SystemExit(1) except socket.error as e: LOG.error("irkerd: server launch failed: %r\n" % e) # end irker-2.17-1e520c080cc8d39257e54fa7c89e289faeca9ed5/irkerd.service000066400000000000000000000003571267153440000232500ustar00rootroot00000000000000# Copyright 2012 Wulf C. Krueger # Distributed under the terms of the BSD LICENSE [Unit] Description=irker daemon Requires=network.target [Service] ExecStart=/usr/bin/irkerd [Install] WantedBy=multi-user.target irker-2.17-1e520c080cc8d39257e54fa7c89e289faeca9ed5/irkerd.xml000066400000000000000000000250571267153440000224140ustar00rootroot00000000000000 irkerd 8 Aug 27 2012 irker irker Commands irkerd relay for shipping notifications to IRC servers irkerd -c ca-file -d debuglevel -e cert-file -l logfile -H host -n nick -p password -i IRC-URL -V -h message text DESCRIPTION irkerd is a specialized write-only IRC client intended to be used for shipping notification messages to IRC channels. The use case in mind when it was designed was broadcasting notifications from commit hooks in version-control systems. The main advantage of relaying through this daemon over individual scripted sends from applications is that it can maintain connection state for multiple channels, rather than producing obnoxious join/leave channel spam on every message. irkerd is a socket server that listens on for UDP or TCP packets on port 6659 for textual request lines containing JSON objects and terminated by a newline. Each JSON object must have two members: "to" specifying a destination or destination list, and "privmsg" specifying the message text. Examples: {"to":"irc://chat.freenode.net/git-ciabot", "privmsg":"Hello, world!"} {"to":["irc://chat.freenode.net/#git-ciabot","irc://chat.freenode.net/#gpsd"],"privmsg":"Multichannel test"} {"to":"irc://chat.hypothetical.net:6668/git-ciabot", "privmsg":"Hello, world!"} {"to":"ircs://chat.hypothetical.net/git-private?key=topsecret", "privmsg":"Keyed channel test"} {"to":"ircs://:topsecret@chat.example.net/git-private", "privmsg":"Password-protected server test"} If the channel part of the URL does not have one of the prefix characters #, &, or +, a # will be prepended to it before shipping - unless the channel part has the suffix ",isnick" (which is unconditionally removed). The host part of the URL may have a port-number suffix separated by a colon, as shown in the third example; otherwise irkerd sends plaintext messages to the default 6667 IRC port of each server, and SSL/TLS messages to 6697. The password for password-protected servers can be set using the usual [{username}:{password}@]{host}:{port} defined in RFC 3986, as shown in the fifth example. Non-empty URL usernames override the default irker username. When the to URL uses the ircs scheme (as shown in the fourth and fifth examples), the connection to the IRC server is made via SSL/TLS (vs. a plaintext connection with the irc scheme). To connect via SSL/TLS with Python 2.x, you need to explicitly declare the certificate authority file used to verify server certificates. For example, -c /etc/ssl/certs/ca-certificates.crt. In Python 3.2 and later, you can still set this option to declare a custom CA file, but irkerd; if you don't set it irkerd will use OpenSSL's default file (using Python's ssl.SSLContext.set_default_verify_paths). In Python 3.2 and later, ssl.match_hostname is used to ensure the server certificate belongs to the intended host, as well as being signed by a trusted CA. To join password-protected (mode +k) channels, the channel part of the URL may be followed with a query-string indicating the channel key, of the form ?secret or ?key=secret, where secret is the channel key. An empty message is legal and will cause irkerd to join or maintain a connection to the target channels without actually emitting a message. This may be useful for advertising that an instance is up and running, or for joining a channel to log its traffic. OPTIONS irkerd takes the following options: -d Takes a following value, setting the debugging level from it; possible values are 'critical', 'error', 'warning', 'info', 'debug'. This option will generally only be of interest to developers, as the logs are designed to help trace irkerd's internal state. These tracing logs are independent of the traffic logs controlled by -l. Logging will be to standard error (if irkerd is running in the foreground) or to /dev/syslog with facility "daemon" (if irkerd is running in the background). The background-ness of irkerd is determined by comparing the process group id with the process group associated with the terminal attached to stdout (with non-matches for background processes). We assume you aren't running irkerd in Windows or another OS that doesn't support os.getpgrp or tcgetpgrp. We assume that if stdout is attached to a TTY associated with the same process group as irkerd, you do intend to log to stderr and not syslog. -e Takes a following filename in pem format and uses it to authenticate to the IRC server. You must be connecting to the IRC server over SSL for this to function properly. This is commonly known as CertFP. -e Takes a following filename in pem format and uses it to authenticate to the IRC server. You must be connecting to the IRC server over SSL for this to function properly. This is commonly known as CertFP. -l Takes a following filename, logs traffic to that file. Each log line consists of three |-separated fields; a numeric timestamp in Unix time, the FQDN of the sending server, and the message data. -H Takes a following hostname, and binds to that address when listening for messages. irkerd binds to localhost by default, but you may want to use your host's public address to listen on a local network. Listening on a public interface is not recommended, as it makes spamming IRC channels very easy. -n Takes a following value, setting the nick to be used. If the nick contains a numeric format element (such as %03d) it is used to generate suffixed fallback names in the event of a nick collision. -p Takes a following value, setting a nickserv password to be used. If given, this password is shipped to authenticate the nick on receipt of a welcome message. -i Immediate mode, to be run in foreground. Takes a following following value interpreted as a channel URL. May take a second argument giving a message string; if the second argument is absent the message is read from standard input (and may contain newlines). Sends the message, then quits. -V Write the program version to stdout and terminate. -h Print usage instructions and terminate. LIMITATIONS Requests via UDP optimizes for lowest latency and network load by avoiding TCP connection setup time; the cost is that delivery is not reliable in the face of packet loss. An irkerd instance with a publicly-accessible request socket could complicate blocking of IRC spam by making it easy for spammers to submit while hiding their IP addresses; the better way to deploy, then, is on places like project-hosting sites where the irkerd socket can be visible from commit-hook code but not exposed to the outside world. Priming your firewall with blocklists of IP addresses known to spew spam is always a good idea. The absence of any option to set the service port is deliberate. If you think you need to do that, you have a problem better solved at your firewall. IRC has a message length limit of 510 bytes; generate your privmsg attribute values with appropriate care. IRC ignores any text after an embedded newline. Be aware that irkerd will turn payload strings with embedded newlines into multiple IRC sends to avoid having message data discarded. Due to a bug in Python URL parsing, IRC urls with both a # and a key part may fail unexpectedly. The workaround is to remove the #. SEE ALSO irkerhook1, AUTHOR Eric S. Raymond esr@snark.thyrsus.com. See the project page at http://www.catb.org/~esr/irker for updates and other resources, including an installable repository hook script. irker-2.17-1e520c080cc8d39257e54fa7c89e289faeca9ed5/irkerhook.py000077500000000000000000000556511267153440000227670ustar00rootroot00000000000000#!/usr/bin/env python # Copyright (c) 2012 Eric S. Raymond # Distributed under BSD terms. # # This script contains git porcelain and porcelain byproducts. # Requires Python 2.6, or 2.5 with the simplejson library installed. # # usage: irkerhook.py [-V] [-n] [--variable=value...] [commit_id...] # # This script is meant to be run in an update or post-commit hook. # Try it with -n to see the notification dumped to stdout and verify # that it looks sane. With -V this script dumps its version and exits. # # See the irkerhook manual page in the distribution for a detailed # explanation of how to configure this hook. # The default location of the irker proxy, if the project configuration # does not override it. # # SPDX-License-Identifier: BSD-2-Clause default_server = "localhost" IRKER_PORT = 6659 # The default service used to turn your web-view URL into a tinyurl so it # will take up less space on the IRC notification line. default_tinyifier = "http://tinyurl.com/api-create.php?url=" # Map magic urlprefix values to actual URL prefixes. urlprefixmap = { "viewcvs": "http://%(host)s/viewcvs/%(repo)s?view=revision&revision=", "gitweb": "http://%(host)s/cgi-bin/gitweb.cgi?p=%(repo)s;a=commit;h=", "cgit": "http://%(host)s/cgi-bin/cgit.cgi/%(repo)s/commit/?id=", } # By default, ship to the freenode #commits list default_channels = "irc://chat.freenode.net/#commits" # # No user-serviceable parts below this line: # version = "2.17" import os, sys, socket, urllib2, subprocess, locale, datetime, re from pipes import quote as shellquote try: import simplejson as json # Faster, also makes us Python-2.5-compatible except ImportError: import json try: getstatusoutput = subprocess.getstatusoutput except AttributeError: import commands getstatusoutput = commands.getstatusoutput def do(command): return unicode(getstatusoutput(command)[1], locale.getlocale()[1] or 'UTF-8').encode(locale.getlocale()[1] or 'UTF-8') class Commit: def __init__(self, extractor, commit): "Per-commit data." self.commit = commit self.branch = None self.rev = None self.mail = None self.author = None self.files = None self.logmsg = None self.url = None self.author_date = None self.commit_date = None self.__dict__.update(extractor.__dict__) def __unicode__(self): "Produce a notification string from this commit." if self.urlprefix.lower() == "none": self.url = "" else: urlprefix = urlprefixmap.get(self.urlprefix, self.urlprefix) webview = (urlprefix % self.__dict__) + self.commit try: # See it the url is accessible res = urllib2.urlopen(webview) if self.tinyifier and self.tinyifier.lower() != "none": try: # Didn't get a retrieval error on the web # view, so try to tinyify a reference to it. self.url = urllib2.urlopen(self.tinyifier + webview).read() try: self.url = self.url.decode('UTF-8') except UnicodeError: pass except IOError: self.url = webview else: self.url = webview except IOError as e: if e.code == 401: # Authentication error, so we assume the view is valid self.url = webview else: self.url = "" res = self.template % self.__dict__ return unicode(res, 'UTF-8') if not isinstance(res, unicode) else res class GenericExtractor: "Generic class for encapsulating data from a VCS." booleans = ["tcp"] numerics = ["maxchannels"] strings = ["email"] def __init__(self, arguments): self.arguments = arguments self.project = None self.repo = None # These aren't really repo data but they belong here anyway... self.email = None self.tcp = True self.tinyifier = default_tinyifier self.server = None self.channels = None self.maxchannels = 0 self.template = None self.urlprefix = None self.host = socket.getfqdn() self.cialike = None self.filtercmd = None # Color highlighting is disabled by default. self.color = None self.bold = self.green = self.blue = self.yellow = "" self.brown = self.magenta = self.cyan = self.reset = "" def activate_color(self, style): "IRC color codes." if style == 'mIRC': # mIRC colors are mapped as closely to the ANSI colors as # possible. However, bright colors (green, blue, red, # yellow) have been made their dark counterparts since # ChatZilla does not properly darken mIRC colors in the # Light Motif color scheme. self.bold = '\x02' self.green = '\x0303' self.blue = '\x0302' self.red = '\x0305' self.yellow = '\x0307' self.brown = '\x0305' self.magenta = '\x0306' self.cyan = '\x0310' self.reset = '\x0F' if style == 'ANSI': self.bold = '\x1b[1m' self.green = '\x1b[1;32m' self.blue = '\x1b[1;34m' self.red = '\x1b[1;31m' self.yellow = '\x1b[1;33m' self.brown = '\x1b[33m' self.magenta = '\x1b[35m' self.cyan = '\x1b[36m' self.reset = '\x1b[0m' def load_preferences(self, conf): "Load preferences from a file in the repository root." if not os.path.exists(conf): return ln = 0 for line in open(conf): ln += 1 if line.startswith("#") or not line.strip(): continue elif line.count('=') != 1: sys.stderr.write('"%s", line %d: missing = in config line\n' \ % (conf, ln)) continue fields = line.split('=') if len(fields) != 2: sys.stderr.write('"%s", line %d: too many fields in config line\n' \ % (conf, ln)) continue variable = fields[0].strip() value = fields[1].strip() if value.lower() == "true": value = True elif value.lower() == "false": value = False # User cannot set maxchannels - only a command-line arg can do that. if variable == "maxchannels": return setattr(self, variable, value) def do_overrides(self): "Make command-line overrides possible." for tok in self.arguments: for key in self.__dict__: if tok.startswith("--" + key + "="): val = tok[len(key)+3:] setattr(self, key, val) for (key, val) in self.__dict__.items(): if key in GenericExtractor.booleans: if type(val) == type("") and val.lower() == "true": setattr(self, key, True) elif type(val) == type("") and val.lower() == "false": setattr(self, key, False) elif key in GenericExtractor.numerics: setattr(self, key, int(val)) elif key in GenericExtractor.strings: setattr(self, key, val) if not self.project: sys.stderr.write("irkerhook.py: no project name set!\n") raise SystemExit(1) if not self.repo: self.repo = self.project.lower() if not self.channels: self.channels = default_channels % self.__dict__ if self.color and self.color.lower() != "none": self.activate_color(self.color) def has(dirname, paths): "Test for existence of a list of paths." # all() is a python2.5 construct for exists in [os.path.exists(os.path.join(dirname, x)) for x in paths]: if not exists: return False return True # VCS-dependent code begins here class GitExtractor(GenericExtractor): "Metadata extraction for the git version control system." @staticmethod def is_repository(dirname): # Must detect both ordinary and bare repositories return has(dirname, [".git"]) or \ has(dirname, ["HEAD", "refs", "objects"]) def __init__(self, arguments): GenericExtractor.__init__(self, arguments) # Get all global config variables self.project = do("git config --get irker.project") self.repo = do("git config --get irker.repo") self.server = do("git config --get irker.server") self.channels = do("git config --get irker.channels") self.email = do("git config --get irker.email") self.tcp = do("git config --bool --get irker.tcp") self.template = '%(bold)s%(project)s:%(reset)s %(green)s%(author)s%(reset)s %(repo)s:%(yellow)s%(branch)s%(reset)s * %(bold)s%(rev)s%(reset)s / %(bold)s%(files)s%(reset)s: %(logmsg)s %(brown)s%(url)s%(reset)s' self.tinyifier = do("git config --get irker.tinyifier") or default_tinyifier self.color = do("git config --get irker.color") self.urlprefix = do("git config --get irker.urlprefix") or "gitweb" self.cialike = do("git config --get irker.cialike") self.filtercmd = do("git config --get irker.filtercmd") # These are git-specific self.refname = do("git symbolic-ref HEAD 2>/dev/null") self.revformat = do("git config --get irker.revformat") # The project variable defaults to the name of the repository toplevel. if not self.project: bare = do("git config --bool --get core.bare") if bare.lower() == "true": keyfile = "HEAD" else: keyfile = ".git/HEAD" here = os.getcwd() while True: if os.path.exists(os.path.join(here, keyfile)): self.project = os.path.basename(here) if self.project.endswith('.git'): self.project = self.project[0:-4] break elif here == '/': sys.stderr.write("irkerhook.py: no git repo below root!\n") sys.exit(1) here = os.path.dirname(here) # Get overrides self.do_overrides() def head(self): "Return a symbolic reference to the tip commit of the current branch." return "HEAD" def commit_factory(self, commit_id): "Make a Commit object holding data for a specified commit ID." commit = Commit(self, commit_id) commit.branch = re.sub(r"^refs/[^/]*/", "", self.refname) # Compute a description for the revision if self.revformat == 'raw': commit.rev = commit.commit elif self.revformat == 'short': commit.rev = '' else: # self.revformat == 'describe' commit.rev = do("git describe %s 2>/dev/null" % shellquote(commit.commit)) if not commit.rev: # Query git for the abbreviated hash commit.rev = do("git log -1 '--pretty=format:%h' " + shellquote(commit.commit)) if self.urlprefix in ('gitweb', 'cgit'): # Also truncate the commit used for the announced urls commit.commit = commit.rev # Extract the meta-information for the commit commit.files = do("git diff-tree -r --name-only " + shellquote(commit.commit)) commit.files = " ".join(commit.files.strip().split("\n")[1:]) # Design choice: for git we ship only the first message line, which is # conventionally supposed to be a summary of the commit. Under # other VCSes a different choice may be appropriate. commit.author_name, commit.mail, commit.logmsg = \ do("git log -1 '--pretty=format:%an%n%ae%n%s' " + shellquote(commit.commit)).split("\n") # This discards the part of the author's address after @. # Might be be nice to ship the full email address, if not # for spammers' address harvesters - getting this wrong # would make the freenode #commits channel into harvester heaven. commit.author = commit.mail.split("@")[0] commit.author_date, commit.commit_date = \ do("git log -1 '--pretty=format:%ai|%ci' " + shellquote(commit.commit)).split("|") return commit class SvnExtractor(GenericExtractor): "Metadata extraction for the svn version control system." @staticmethod def is_repository(dirname): return has(dirname, ["format", "hooks", "locks"]) def __init__(self, arguments): GenericExtractor.__init__(self, arguments) # Some things we need to have before metadata queries will work self.repository = '.' for tok in arguments: if tok.startswith("--repository="): self.repository = tok[13:] self.project = os.path.basename(self.repository) self.template = '%(bold)s%(project)s%(reset)s: %(green)s%(author)s%(reset)s %(repo)s * %(bold)s%(rev)s%(reset)s / %(bold)s%(files)s%(reset)s: %(logmsg)s %(brown)s%(url)s%(reset)s' self.urlprefix = "viewcvs" self.load_preferences(os.path.join(self.repository, "irker.conf")) self.do_overrides() def head(self): sys.stderr.write("irker: under svn, hook requires a commit argument.\n") raise SystemExit(1) def commit_factory(self, commit_id): self.id = commit_id commit = Commit(self, commit_id) commit.branch = "" commit.rev = "r%s" % self.id commit.author = self.svnlook("author") commit.commit_date = self.svnlook("date").partition('(')[0] commit.files = self.svnlook("dirs-changed").strip().replace("\n", " ") commit.logmsg = self.svnlook("log").strip() return commit def svnlook(self, info): return do("svnlook %s %s --revision %s" % (shellquote(info), shellquote(self.repository), shellquote(self.id))) class HgExtractor(GenericExtractor): "Metadata extraction for the Mercurial version control system." @staticmethod def is_repository(directory): return has(directory, [".hg"]) def __init__(self, arguments): # This fiddling with arguments is necessary since the Mercurial hook can # be run in two different ways: either directly via Python (in which # case hg should be pointed to the hg_hook function below) or as a # script (in which case the normal __main__ block at the end of this # file is exercised). In the first case, we already get repository and # ui objects from Mercurial, in the second case, we have to create them # from the root path. self.repository = None if arguments and type(arguments[0]) == type(()): # Called from hg_hook function ui, self.repository = arguments[0] arguments = [] # Should not be processed further by do_overrides else: # Called from command line: create repo/ui objects from mercurial import hg, ui as uimod repopath = '.' for tok in arguments: if tok.startswith('--repository='): repopath = tok[13:] ui = uimod.ui() ui.readconfig(os.path.join(repopath, '.hg', 'hgrc'), repopath) self.repository = hg.repository(ui, repopath) GenericExtractor.__init__(self, arguments) # Extract global values from the hg configuration file(s) self.project = ui.config('irker', 'project') self.repo = ui.config('irker', 'repo') self.server = ui.config('irker', 'server') self.channels = ui.config('irker', 'channels') self.email = ui.config('irker', 'email') self.tcp = str(ui.configbool('irker', 'tcp')) # converted to bool again in do_overrides self.template = '%(bold)s%(project)s:%(reset)s %(green)s%(author)s%(reset)s %(repo)s:%(yellow)s%(branch)s%(reset)s * %(bold)s%(rev)s%(reset)s / %(bold)s%(files)s%(reset)s: %(logmsg)s %(brown)s%(url)s%(reset)s' self.tinyifier = ui.config('irker', 'tinyifier') or default_tinyifier self.color = ui.config('irker', 'color') self.urlprefix = (ui.config('irker', 'urlprefix') or ui.config('web', 'baseurl') or '') if self.urlprefix: # self.commit is appended to this by do_overrides self.urlprefix = self.urlprefix.rstrip('/') + '/rev/' self.cialike = ui.config('irker', 'cialike') self.filtercmd = ui.config('irker', 'filtercmd') if not self.project: self.project = os.path.basename(self.repository.root.rstrip('/')) self.do_overrides() def head(self): "Return a symbolic reference to the tip commit of the current branch." return "-1" def commit_factory(self, commit_id): "Make a Commit object holding data for a specified commit ID." from mercurial.node import short from mercurial.templatefilters import person node = self.repository.lookup(commit_id) commit = Commit(self, short(node)) # Extract commit-specific values from a "context" object ctx = self.repository.changectx(node) commit.rev = '%d:%s' % (ctx.rev(), commit.commit) commit.branch = ctx.branch() commit.author = person(ctx.user()) commit.author_date = \ datetime.datetime.fromtimestamp(ctx.date()[0]).strftime('%Y-%m-%d %H:%M:%S') commit.logmsg = ctx.description() # Extract changed files from status against first parent st = self.repository.status(ctx.p1().node(), ctx.node()) commit.files = ' '.join(st[0] + st[1] + st[2]) return commit def hg_hook(ui, repo, **kwds): # To be called from a Mercurial "commit", "incoming" or "changegroup" hook. # Example configuration: # [hooks] # incoming.irker = python:/path/to/irkerhook.py:hg_hook extractor = HgExtractor([(ui, repo)]) start = repo[kwds['node']].rev() end = len(repo) if start != end: # changegroup with multiple commits, so we generate a notification # for each one for rev in range(start, end): ship(extractor, rev, False) else: ship(extractor, kwds['node'], False) # The files we use to identify a Subversion repo might occur as content # in a git or hg repo, but the special subdirectories for those are more # reliable indicators. So test for Subversion last. extractors = [GitExtractor, HgExtractor, SvnExtractor] # VCS-dependent code ends here def ship(extractor, commit, debug): "Ship a notification for the specified commit." metadata = extractor.commit_factory(commit) # This is where we apply filtering if extractor.filtercmd: cmd = '%s %s' % (shellquote(extractor.filtercmd), shellquote(json.dumps(metadata.__dict__))) data = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE).stdout.read() try: metadata.__dict__.update(json.loads(data)) except ValueError: sys.stderr.write("irkerhook.py: could not decode JSON: %s\n" % data) raise SystemExit(1) # Rewrite the file list if too long. The objective here is only # to be easier on the eyes. if extractor.cialike \ and extractor.cialike.lower() != "none" \ and len(metadata.files) > int(extractor.cialike): files = metadata.files.split() dirs = set([d.rpartition('/')[0] for d in files]) if len(dirs) == 1: metadata.files = "(%s files)" % (len(files),) else: metadata.files = "(%s files in %s dirs)" % (len(files), len(dirs)) # Message reduction. The assumption here is that IRC can't handle # lines more than 510 characters long. If we exceed that length, we # try knocking out the file list, on the theory that for notification # purposes the commit text is more important. If it's still too long # there's nothing much can be done other than ship it expecting the IRC # server to truncate. privmsg = unicode(metadata) if len(privmsg) > 510: metadata.files = "" privmsg = unicode(metadata) # Anti-spamming guard. It's deliberate that we get maxchannels not from # the user-filtered metadata but from the extractor data - means repo # administrators can lock in that setting. channels = metadata.channels.split(",") if extractor.maxchannels != 0: channels = channels[:extractor.maxchannels] # Ready to ship. message = json.dumps({"to": channels, "privmsg": privmsg}) if debug: print message elif channels: try: if extractor.email: # We can't really figure out what our SF username is without # exploring our environment. The mail pipeline doesn't care # about who sent the mail, other than being from sourceforge. # A better way might be to simply call mail(1) sender = "irker@users.sourceforge.net" msg = """From: %(sender)s Subject: irker json %(message)s""" % {"sender":sender, "message":message} import smtplib smtp = smtplib.SMTP() smtp.connect() smtp.sendmail(sender, extractor.email, msg) smtp.quit() elif extractor.tcp: try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((extractor.server or default_server, IRKER_PORT)) sock.sendall(message + "\n") finally: sock.close() else: try: sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) sock.sendto(message + "\n", (extractor.server or default_server, IRKER_PORT)) finally: sock.close() except socket.error, e: sys.stderr.write("%s\n" % e) if __name__ == "__main__": notify = True repository = os.getcwd() commits = [] for arg in sys.argv[1:]: if arg == '-n': notify = False elif arg == '-V': print "irkerhook.py: version", version sys.exit(0) elif arg.startswith("--repository="): repository = arg[13:] elif not arg.startswith("--"): commits.append(arg) # Figure out which extractor we should be using for candidate in extractors: if candidate.is_repository(repository): cls = candidate break else: sys.stderr.write("irkerhook: cannot identify a repository type.\n") raise SystemExit(1) extractor = cls(sys.argv[1:]) # And apply it. if not commits: commits = [extractor.head()] for commit in commits: ship(extractor, commit, not notify) #End irker-2.17-1e520c080cc8d39257e54fa7c89e289faeca9ed5/irkerhook.xml000066400000000000000000000333241267153440000231250ustar00rootroot00000000000000 irkerhook 1 Aug 27 2012 irker irker Commands irkerhook repository hook script issuing irker notifications irkerhook.py -n -V --variable=value commit-id DESCRIPTION irkerhook.py is a Python script intended to be called from the post-commit hook of a version-control repository. Its job is to collect information about the commit that fired the hook (and possibly preferences set by the repository owner) and ship that information to an instance of irkerd for forwarding to various announcement channels. The proper invocation and behavior of irkerhook.py varies depending on which VCS (version-control system) is calling it. There are four different places from which it may extract information: Calls to VCS utilities. In VCSes like git that support user-settable configuration variables, variables with the prefix "irker.". In other VCSes, a configuration file, "irker.conf", in the repository's internals directory. Command-line arguments of the form --variable=value. The following variables are general to all supported VCSes: project The name of the project. Should be a relatively short identifier; will usually appear at the very beginning of a notification. repo The name of the repository top-level directory. If not specified, defaults to a lowercased copy of the project name. channels An IRC channel URL, or comma-separated list of same, identifying channels to which notifications are to be sent. If not specified, the default is the freenode #commits channel. server The host on which the notification-relaying irker daemon is expected to reside. Defaults to "localhost". email If set, use email for communication rather than TCP or UDP. The value is used as the target mail address. tcp If "true", use TCP for communication; if "false", use UDP. Defaults to "false". urlprefix Changeset URL prefix for your repo. When the commit ID is appended to this, it should point at a CGI that will display the commit through cgit, gitweb or something similar. The defaults will probably work if you have a typical gitweb/cgit setup. If the value of this variable is "None", generation of the URL field in commit notifications will be suppressed. Other magic values are "cgit", "gitweb", and "viewcvs", which expand to URL templates that will usually work with those systems. The magic cookies "%(host)s" and %(repo)s" may occur in this URL. The former is expanded to the FQDN of the host on which irkerhook.py is running; the latter is expanded to the value of the "repo" variable. tinyifier URL template pointing to a service for compressing URLs so they will take up less space in the notification line. If the value of this variable is "None", no compression will be attempted. color If "mIRC", highlight notification fields with mIRC color codes. If "ANSI", highlight notification fields with ANSI color escape sequences. Defaults to "none" (no colors). ANSI codes are supported in Chatzilla, irssi, ircle, and BitchX; mIRC codes only are recognized in mIRC, XChat, KVirc, Konversation, or weechat. Note: if you turn this on and notifications stop appearing on your channel, you need to turn off IRC's color filter on that channel. To do this you will need op privileges; issue the command "/mode <channel> -c" with <channel> replaced by your channel name. You may need to first issue the command "/msg chanserv set <channel> MLOCK +nt-slk". maxchannels Interpreted as an integer. If not zero, limits the number of channels the hook will interpret from the "channels" variable. This variable cannot be set through VCS configuration variables or irker.conf; it can only be set with a command-line argument. Thus, on a forge site in which repository owners are not allowed to modify their post-commit scripts, a site administrator can set it to prevent shotgun spamming by malicious project owners. Setting it to a value less than 2, however, would probably be unwise. cialike If not empty and not "None" (the default), this emulates the old CIA behavior of dropping long lists of files in favor of a summary of the form (N files in M directories). The value must be numeric giving a threshold value for the length of the file list in characters. git Under git, the normal way to invoke this hook (from within the update hook) passes it a refname followed by a list of commits. Because git rev-list normally lists from most recent to oldest, you'll want to use --reverse to make notifications be omitted in chronological order. In a normal update script, the invocation should look like this refname=$1 old=$2 new=$3 irkerhook.py --refname=${refname} $(git rev-list --reverse ${old}..${new}) except that you'll need an absolute path for irkerhook.py. For testing purposes and backward compatibility, if you invoke irkerhook.py with no arguments (as in a post-commit hook) it will behave as though it had been called like this: irkerhook.py --refname=refs/heads/master HEAD However, this will not give the right result when you push to a non-default branch of a bare repo. A typical way to install this hook is actually in the post-receive hook, because it gets all the necessary details and will not abort the push on failure. Use the following script: #!/bin/sh echo "sending IRC notification" while read old new refname; do irkerhook --refname=${refname} $(git rev-list --reverse ${old}..${new}) done Preferences may be set in the repo config file in an [irker] section. Here is an example of what that can look like: [irker] project = gpsd color = ANSI channels = irc://chat.freenode.net/gpsd,irc://chat.freenode.net/commits You should not set the "repository" variable (an equivalent will be computed). No attempt is made to interpret an irker.conf file. The default value of the "project" variable is the basename of the repository directory. The default value of the "urlprefix" variable is "cgit". There is one git-specific variable, "revformat", controlling the format of the commit identifier in a notification. It may have the following values: raw full hex ID of commit short first 12 chars of hex ID describe describe relative to last tag, falling back to short The default is 'describe'. Subversion Under Subversion, irkerhook.py accepts a --repository option with value (the absolute pathname of the Subversion repository) and a commit argument (the numeric revision level of the commit). The defaults are the current working directory and HEAD, respectively. Note, however, that you cannot default the repository argument inside a Subversion post-commit hook; this is because of a limitation of Subversion, which is that getting the current directory is not reliable inside these hooks. Instead, the values must be the two arguments that Subversion passes to that hook as arguments. Thus, a typical invocation in the post-commit script will look like this: REPO=$1 REV=$2 irkerhook.py --repository=$REPO $REV Other --variable=value settings may also be given on the command line, and will override any settings in an irker.conf file. The default for the project variable is the basename of the repository. The default value of the "urlprefix" variable is "viewcvs". If an irker.conf file exists in the repository root directory (not the checkout directory but where internals such as the "format" file live) the hook will interpret variable settings from it. Here is an example of what such a file might look like: # irkerhook variable settings for the irker project project = irker channels = irc://chat.freenode/irker,irc://chat.freenode/commits tcp = false Don't set the "repository" or "commit" variables in this file; that would have unhappy results. There are no Subversion-specific variables. Mercurial Under Mercurial, irkerhook.py can be invoked in two ways: either as a Python hook (preferred) or as a script. To call it as a Python hook, add the collowing to the "commit" or "incoming" hook declaration in your Mercurial repository: [hooks] incoming.irker = python:/path/to/irkerhook.py:hg_hook When called as a script, the hook accepts a --repository option with value (the absolute pathname of the Mercurial repository) and can take a commit argument (the Mercurial hash ID of the commit or a reference to it). The default for the repository argument is the current directory. The default commit argument is '-1', designating the current tip commit. As for git, in both cases all variables may be set in the repo hgrc file in an [irker] section. Command-line variable=value arguments are accepted but not required for script invocation. No attempt is made to interpret an irker.conf file. The default value of the "project" variable is the basename of the repository directory. The default value of the "urlprefix" variable is the value of the "web.baseurl" config value, if it exists. Filtering It is possible to filter commits before sending them to irkerd. You have to specify the option, which will be the command irkerhook.py will run. This command should accept one arguments, which is a JSON representation of commit and extractor metadata (including the channels variable). The command should emit to standard output a JSON representation of (possibly altered) metadata. Below is an example filter: #!/usr/bin/env python # This is a trivial example of a metadata filter. # All it does is change the name of the commit's author. # import sys, json metadata = json.loads(sys.argv[1]) metadata['author'] = "The Great and Powerful Oz" print json.dumps(metadata) # end Standard error is available to the hook for progress and error messages. OPTIONS irkerhook.py takes the following options: -n Suppress transmission to a daemon. Instead, dump the generated JSON request to standard output. Useful for debugging. -V Write the program version to stdout and terminate. SEE ALSO irkerd8, AUTHOR Eric S. Raymond esr@snark.thyrsus.com. See the project page at http://www.catb.org/~esr/irker for updates and other resources. irker-2.17-1e520c080cc8d39257e54fa7c89e289faeca9ed5/org.catb.irkerd.plist000066400000000000000000000007361267153440000244420ustar00rootroot00000000000000 KeepAlive Label org.catb.irkerd ProgramArguments /usr/bin/irkerd RunAtLoad UserName nobody GroupName nobody irker-2.17-1e520c080cc8d39257e54fa7c89e289faeca9ed5/requirements.txt000066400000000000000000000000171267153440000236630ustar00rootroot00000000000000PySocks==1.5.6 irker-2.17-1e520c080cc8d39257e54fa7c89e289faeca9ed5/security.txt000066400000000000000000000265721267153440000230250ustar00rootroot00000000000000= Security analysis of irker = This is an analysis of security and DoS vulnerabilities associated with irker, exploring and explaining certain design choices. Much of it derives from a code audit and report by Daniel Franke. == Assumptions and Goals == We begin by stating some assumptions about how irker will be deployed, and articulating a set of security goals. Communication flow in an irker deployment will look like this: ----------------------------------------------------------------------------- Committers | | Version-control repositories | | irkerhook.py | | irkerd | | IRC servers ----------------------------------------------------------------------------- Here are our assumptions: 1. The repositories are hosted on a public forge sites such as SourceForge, GitHub, Gitorious, Savannah, or Gna and must be accessible to untrusted users. 2. Repository project owners can set properties on their repositories (including but not limited to irker.*), and may be able to set custom post-commit hooks which can execute arbitrary code on the repository server. In particular, these people my be able to modify the local copy of irkerhook.py. 3. The machine which hosts irkerd has the same owner as the machine which hosts the the repo; these machines are possibly but not necessarily one and the same. 4. The network is protected by a perimeter firewall, and only a trusted group is able to emit arbitrary packets from inside the perimeter; committers are not necessarily part of this group. 5. irkerd communicates with IRC servers over the open internet, and an IRC server's administrator is assumed to hold no position of trust with any other party. We can, accordingly, identify the following groups of security principals: A. irker administrators. B. Project committers. C. Project owners D. IRC server administrators. E. Other people on irker's internal network. F. irkerd-IRC men-in-the-middle (i.e. people who control the network path between irkerd and the IRC server). G. Random people on the internet. Our security goals for irker can be enumerated as follows: * Control: We don't want anyone outside group A gaining control of the machines which host irkerd or the git repos. * Availability: Only group A should be able to to deny or degrade irkerd's ability to receive commit messages and relay them to the IRC server. We recognize and accept as inevitable that MITMs (groups E and F) can do this too (by ARP spoofing, cable-cutting, etc.). But, in particular, we would like irker-mediated services to be resilient against DoS (denial of service) attacks. * Authentication/integrity: Notifications should be truthful, i.e., commit messages sent to IRC channels should actually reflect that a corresponding commit has taken place. We accept that groups A, C, D, and E can violate this property. * Secrecy: irker shouldn't aid spammers (group G) in harvesting committers' email addresses. * Auditability: If people abuse irkerd, we want to be able to identify the abusive account or IP address. == Control Issues == We have audited the irker and irkerhook.py code for exploitable vulnerabilities. We have not found any in the code itself, and the use of Python gives us confidence in the absence of large classes of errors (such as buffer overruns) that afflict C programs. However, the fact that irkerhook.py relies on external binaries to mine data out of its repository opens up a well-known set of vulnerabilities if a malicious user is able to insert binaries in a carelessly-set execution path. Normal precautions against this should be taken. == Availability == === Solved problems === When the original implementation of irkerd saw a nick collision it generated new nicks in a predictable sequence. A malicious IRC user could have continuously changed his own nick to the next one that irkerd is going to try. Some randomness has been added to nick generation to prevent this. === Unsolved problems === DoS attacks on any networked application can never completely prevented, only mitigated by forcing attackers to invest more resources. Here we consider the easiest attack paths against irker, and possible countermeasures. irker handles each connection to a particular IRC server in a separate thread - actually, due to server limits on open channels per connection, there may be multiple sessions per server. This may not scale well, especially on 32-bit architectures. Thread instance overhead, combined with the lack of any restriction on how many URLs can appear in the 'to' list, is a DoS vulnerability. If a repository's properties specify that notifications should go to more than about 500 unique hostnames, then on 32-bit architectures we'll hit the 4GB cap on virtual memory (even while the resident set size remains small). Another ceiling to watch out for is the ulimit on file descriptors, which defaults to 1024 on many Linux systems but can safely be set much larger. Each connection instance costs a file descriptor. We consider some possible ways of addressing the problem: 1. Limit the number of URLs in a request. Pretty painless - it will be very rare that anyone wants to specify a larger set than a project channel plus freenode #commits - but also ineffective. A malicious hook could achieve DoS simply by spamming lots of requests. 2. Limit the total number of requests than can be queued. Completely ineffective - just sets a target for the DoS attack. 3. Limit the number of requests that can be queued by source IP address. This might be worth doing; it would stymie a single-source DoS attack through a publicly-exposed irkerd, though not a DDoS by a botnet. But there isn't a lot of win here for a properly installed irker (e.g. behind a firewall), which is typically going to get all its requests from a single repo host anyway. 4. Rate-limit requests by source IP address - that is, after any request discard additional ones during some timeout period. Again, good for stopping a single-source DoS against an exposed irker, won't stop a DDoS. The real problem though, is that any such rate limit might interfere with legitimate high-volume use by a very active repo site. After this we appear to have run out of easy options, as source IP address is the only thing irkerd can see that an attacker can't spoof. We mitigate some availability risks by reaping old sessions when we're near resource limits. An ordinary DoS attack would then be prevented from completely blocking all message traffic; the cost would be a whole lot of join/leave spam due to connection churn. == Authentication/Integrity == One way to help prevent DoS attacks would be in-band authentication - requiring irkerd submitters to present a credential along with each message submission. In principle this, if it existed, could also be used to verify that a submitter is authorized to issue notifications with respect to a given project. We rejected this approach. The design goal for irker was to make submissions fast, cheap, and stateless; baking an authentication system directly into the irkerd codebase would have conflicted with these objectives, not to mention probably becoming the camel's nose for a godawful amount of code bloat. The deployment advice in the installation instructions assumes that irkerd submitters are "authenticated" by being inside a firewall - that is, mesages are issued from an intranet and it can be trusted that anyone issuing messages from within a given intranet is authorized to do so. This fits the assumption that irker instances will run on forge sites receiving requests from instances of irkerhook.py. One larger issue (not unique to irker) is that because of the insecured nature of IRC it is essentially impossible to secure #commits against commit notifications that are either garbled by software errors and misconfigurations or maliciously crafted to confuse anyone attempting to gather statistics from that channel. The lesson here is that IRC monitoring isn't a good method for that purpose; going direct to the repositories via a toolkit such as Ohloh is a far better idea. When this analysis was originally written, we recommended using spiped or stunnel to solve the problem of passing notifications from irkerd to IRC servers over a potentially hostile network that might interfere with them. Later, SSL/TLS support proved easy to add and is now in irkerd itself. == Secrecy == irkerd has no inherent secrecy risks. The distributed version of irkerhook.py removes the host part of author addresses specifically in order to prevent address harvesting from the notifications. == Auditability == We previously noted that source IP address is the only thing irker can see that an attacker can't spoof. This makes auditability difficult unless we impose conventions on the notifications passing though it. The irkerhook.py that we ship inherits an auditability property from the CIA service it was designed to replace: the first field of every notification (terminated by a colon) is the name of the issuing project. The only other competitor to replace CIA known to us (kgb_bot) shares this property. In the general case we cannot guarantee this property against groups A and F. == Risks relative to centralized services == irker and irkerhook.py were written as a replacement for the now-defunct CIA notification service. The author has written a critique of that service: "CIA and the perils of overengineering" at . It is thus worth considering how a risk assessment of CIA compares to this one. The principal advantages of CIA from a security point of view were (a) it provided a single point at which spam filtering and source blocking could be done with benefit to all projects using the service, and (b) since it had to have a database anyway for routing messages to project channels, the incremental overhead for an authentication feature would have been relatively low. As a matter of fact rather than theory CIA never fully exploited either possibility. Anyone could create a CIA project entry with fanout to any desired set of IRC channels. Notifications were not authenticated, so anyone could masquerade as a member of any project. The only check on abuse was human intervention to source-block spammers, and this was by no means completely effective - spam shipped via CIA was occasionally seen on on the freenode #commits channel. The principal security disadvantage of CIA was that it meant the entire notification system was subject to single-point failure due to software or hosting failures on cia.vc, or to DoS attacks against the server. While there is no evidence that the site was ever deliberately DoSed, failures were sufficiently common that a half-hearted DoS attack might not have been even noticed. Despite the absence of authentication, irker instances on properly firewalled intranets do not obviously pose additional spamming risks beyond those incurred by the CIA service. The overall robustness of the notification system as a whole should be greatly improved. == Conclusions == The security and DoS issues irker has are not readily addressable by changing the irker codebase itself, short of a complete (much more complex and heavyweight) redesign. They are largely implicit risks of its operating environment and must be managed by properly controlling access to irker instances.