pax_global_header00006660000000000000000000000064130212616520014510gustar00rootroot0000000000000052 comment=b4ba4f9b68b7b71db0714f8bb28577d45c286ead python-pgq/000077500000000000000000000000001302126165200131625ustar00rootroot00000000000000python-pgq/.coveragerc000066400000000000000000000000731302126165200153030ustar00rootroot00000000000000[report] exclude_lines = ^try: ^except pragma: no cover python-pgq/.gitignore000066400000000000000000000002551302126165200151540ustar00rootroot00000000000000__pycache__ *.pyc *.swp *.o *.so *.egg-info *.debhelper *.log *.substvars *-stamp debian/files debian/python-* debian/python3-* .tox .coverage .pybuild MANIFEST build tmp python-pgq/.prospector.yaml000066400000000000000000000010611302126165200163220ustar00rootroot00000000000000 strictness: high max-line-length: 120 member-warnings: true doc-warnings: false test-warnings: false pylint: options: init-import: true dummy-variables-rgx: _$|dummy|___ disable: # register_consumer - arguments-differ # optparse - deprecated-module - bare-except - invalid-name - no-self-use - protected-access - too-many-arguments - too-many-branches - too-many-instance-attributes - too-many-lines - too-many-locals - too-many-statements - unused-argument mccabe: run: false python-pgq/Makefile000066400000000000000000000004051302126165200146210ustar00rootroot00000000000000 all: clean: rm -rf build *.egg-info */__pycache__ tests/*.pyc rm -rf debian/python-* debian/files debian/*.log rm -rf debian/*.substvars debian/*.debhelper debian/*-stamp rm -rf .pybuild MANIFEST deb: debuild -us -uc -b xclean: clean rm -rf .tox dist python-pgq/debian/000077500000000000000000000000001302126165200144045ustar00rootroot00000000000000python-pgq/debian/changelog000066400000000000000000000001731302126165200162570ustar00rootroot00000000000000python-pgq (3.3.0-1) stable; urgency=low * v3.3.0 -- Marko Kreen Fri, 04 Dec 2015 17:00:23 +0200 python-pgq/debian/compat000066400000000000000000000000021302126165200156020ustar00rootroot000000000000009 python-pgq/debian/control000066400000000000000000000010711302126165200160060ustar00rootroot00000000000000Source: python-pgq Section: python Priority: optional Maintainer: Marko Kreen Standards-Version: 3.9.2 Build-Depends: debhelper (>= 9), dh-python, python-all, python3-all, python-setuptools, python3-setuptools X-Python-Version: >= 2.7 X-Python3-Version: >= 3.5 Package: python-pgq Architecture: all Conflicts: python-pgq3 Depends: ${misc:Depends}, ${python:Depends} Description: PgQ queue processing for Python . Package: python3-pgq Architecture: all Depends: ${misc:Depends}, ${python3:Depends} Description: PgQ queue processing for Python . python-pgq/debian/copyright000066400000000000000000000003011302126165200163310ustar00rootroot00000000000000Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Source: https://github.com/pgq/python-pgq Files: * Copyright: Copyright (c) 2007-2016, Skytools Authors License: ISC python-pgq/debian/py3dist-overrides000066400000000000000000000000641302126165200177260ustar00rootroot00000000000000skytools python3-skytools psycopg2 python3-psycopg2 python-pgq/debian/pydist-overrides000066400000000000000000000000621302126165200176410ustar00rootroot00000000000000skytools python-skytools psycopg2 python-psycopg2 python-pgq/debian/rules000077500000000000000000000002421302126165200154620ustar00rootroot00000000000000#! /usr/bin/make -f #export DH_VERBOSE = 1 export DEB_BUILD_OPTIONS = nocheck export PYBUILD_NAME = pgq %: dh $@ --with python2,python3 --buildsystem=pybuild python-pgq/debian/source/000077500000000000000000000000001302126165200157045ustar00rootroot00000000000000python-pgq/debian/source/format000066400000000000000000000000141302126165200171120ustar00rootroot000000000000003.0 (quilt) python-pgq/pgq/000077500000000000000000000000001302126165200137515ustar00rootroot00000000000000python-pgq/pgq/__init__.py000066400000000000000000000015771302126165200160740ustar00rootroot00000000000000"""PgQ framework for Python.""" from __future__ import division, absolute_import, print_function from pgq.event import Event from pgq.consumer import Consumer from pgq.coopconsumer import CoopConsumer from pgq.localconsumer import LocalConsumer from pgq.producer import bulk_insert_events, insert_event from pgq.remoteconsumer import RemoteConsumer, SerialConsumer from pgq.status import PGQStatus from pgq.cascade.admin import CascadeAdmin from pgq.cascade.consumer import CascadedConsumer from pgq.cascade.nodeinfo import MemberInfo, NodeInfo, QueueInfo from pgq.cascade.worker import CascadedWorker __all__ = [ 'Event', 'Consumer', 'CoopConsumer', 'LocalConsumer', 'bulk_insert_events', 'insert_event', 'RemoteConsumer', 'SerialConsumer', 'PGQStatus', 'CascadeAdmin', 'CascadedConsumer', 'CascadedWorker', 'MemberInfo', 'NodeInfo', 'QueueInfo' ] __version__ = '3.3' python-pgq/pgq/baseconsumer.py000066400000000000000000000255611302126165200170220ustar00rootroot00000000000000 """PgQ consumer framework for Python. todo: - pgq.next_batch_details() - tag_done() by default """ from __future__ import division, absolute_import, print_function import sys import time import skytools from pgq.event import Event __all__ = ['BaseConsumer', 'BaseBatchWalker'] class BaseBatchWalker(object): """Lazy iterator over batch events. Events are loaded using cursor. It will be given as ev_list to process_batch(). It allows: - one for loop over events - len() after that """ def __init__(self, curs, batch_id, queue_name, fetch_size=300, consumer_filter=None): self.queue_name = queue_name self.fetch_size = fetch_size self.sql_cursor = "batch_walker" self.curs = curs self.length = 0 self.batch_id = batch_id self.fetch_status = 0 # 0-not started, 1-in-progress, 2-done self.consumer_filter = consumer_filter def _make_event(self, queue_name, row): return Event(queue_name, row) def __iter__(self): if self.fetch_status: raise Exception("BatchWalker: double fetch? (%d)" % self.fetch_status) self.fetch_status = 1 q = "select * from pgq.get_batch_cursor(%s, %s, %s, %s)" self.curs.execute(q, [self.batch_id, self.sql_cursor, self.fetch_size, self.consumer_filter]) # this will return first batch of rows q = "fetch %d from %s" % (self.fetch_size, self.sql_cursor) while 1: rows = self.curs.fetchall() if not len(rows): break self.length += len(rows) for row in rows: ev = self._make_event(self.queue_name, row) yield ev # if less rows than requested, it was final block if len(rows) < self.fetch_size: break # request next block of rows self.curs.execute(q) self.curs.execute("close %s" % self.sql_cursor) self.fetch_status = 2 def __len__(self): return self.length class BaseConsumer(skytools.DBScript): """Consumer base class. Do not subclass directly (use pgq.Consumer or pgq.LocalConsumer instead) Config template:: ## Parameters for pgq.Consumer ## # queue name to read from queue_name = # override consumer name #consumer_name = %(job_name)s # filter out only events for specific tables #table_filter = table1, table2 # whether to use cursor to fetch events (0 disables) #pgq_lazy_fetch = 300 # whether to read from source size in autocommmit mode # not compatible with pgq_lazy_fetch # the actual user script on top of pgq.Consumer must also support it #pgq_autocommit = 0 # whether to wait for specified number of events, # before assigning a batch (0 disables) #pgq_batch_collect_events = 0 # whether to wait specified amount of time, # before assigning a batch (postgres interval) #pgq_batch_collect_interval = # whether to stay behind queue top (postgres interval) #pgq_keep_lag = # in how many seconds to write keepalive stats for idle consumers # this stats is used for detecting that consumer is still running #keepalive_stats = 300 """ # by default, use cursor-based fetch default_lazy_fetch = 300 # should reader connection be used in autocommit mode pgq_autocommit = 0 # proper variables consumer_name = None queue_name = None # compat variables pgq_queue_name = None pgq_consumer_id = None pgq_lazy_fetch = None pgq_min_count = None pgq_min_interval = None pgq_min_lag = None batch_info = None consumer_filter = None keepalive_stats = None # statistics: time spent waiting for events idle_start = None _batch_walker_class = BaseBatchWalker def __init__(self, service_name, db_name, args): """Initialize new consumer. @param service_name: service_name for DBScript @param db_name: name of database for get_database() @param args: cmdline args for DBScript """ super(BaseConsumer, self).__init__(service_name, args) self.db_name = db_name # compat params self.consumer_name = self.cf.get("pgq_consumer_id", '') self.queue_name = self.cf.get("pgq_queue_name", '') # proper params if not self.consumer_name: self.consumer_name = self.cf.get("consumer_name", self.job_name) if not self.queue_name: self.queue_name = self.cf.get("queue_name") self.stat_batch_start = 0 # compat vars self.pgq_queue_name = self.queue_name self.consumer_id = self.consumer_name # set default just once self.pgq_autocommit = self.cf.getint("pgq_autocommit", self.pgq_autocommit) if self.pgq_autocommit and self.pgq_lazy_fetch: raise skytools.UsageError("pgq_autocommit is not compatible with pgq_lazy_fetch") self.set_database_defaults(self.db_name, autocommit=self.pgq_autocommit) self.idle_start = time.time() def reload(self): skytools.DBScript.reload(self) self.pgq_lazy_fetch = self.cf.getint("pgq_lazy_fetch", self.default_lazy_fetch) # set following ones to None if not set self.pgq_min_count = self.cf.getint("pgq_batch_collect_events", 0) or None self.pgq_min_interval = self.cf.get("pgq_batch_collect_interval", '') or None self.pgq_min_lag = self.cf.get("pgq_keep_lag", '') or None # filter out specific tables only tfilt = [] for t in self.cf.getlist('table_filter', ''): tfilt.append(skytools.quote_literal(skytools.fq_name(t))) if len(tfilt) > 0: expr = "ev_extra1 in (%s)" % ','.join(tfilt) self.consumer_filter = expr self.keepalive_stats = self.cf.getint("keepalive_stats", 300) def startup(self): """Handle commands here. __init__ does not have error logging.""" if self.options.register: self.register_consumer() sys.exit(0) if self.options.unregister: self.unregister_consumer() sys.exit(0) return skytools.DBScript.startup(self) def init_optparse(self, parser=None): p = super(BaseConsumer, self).init_optparse(parser) p.add_option('--register', action='store_true', help='register consumer on queue') p.add_option('--unregister', action='store_true', help='unregister consumer from queue') return p def process_event(self, db, event): """Process one event. Should be overridden by user code. """ raise Exception("needs to be implemented") def process_batch(self, db, batch_id, event_list): """Process all events in batch. By default calls process_event for each. Can be overridden by user code. """ for ev in event_list: self.process_event(db, ev) def work(self): """Do the work loop, once (internal). Returns: true if wants to be called again, false if script can sleep. """ db = self.get_database(self.db_name) curs = db.cursor() self.stat_start() # acquire batch batch_id = self._load_next_batch(curs) db.commit() if batch_id is None: return 0 # load events ev_list = self._load_batch_events(curs, batch_id) db.commit() # process events self._launch_process_batch(db, batch_id, ev_list) # done self._finish_batch(curs, batch_id, ev_list) db.commit() self.stat_end(len(ev_list)) return 1 def register_consumer(self): self.log.info("Registering consumer on source queue") db = self.get_database(self.db_name) cx = db.cursor() cx.execute("select pgq.register_consumer(%s, %s)", [self.queue_name, self.consumer_name]) res = cx.fetchone()[0] db.commit() return res def unregister_consumer(self): self.log.info("Unregistering consumer from source queue") db = self.get_database(self.db_name) cx = db.cursor() cx.execute("select pgq.unregister_consumer(%s, %s)", [self.queue_name, self.consumer_name]) db.commit() def _launch_process_batch(self, db, batch_id, ev_list): self.process_batch(db, batch_id, ev_list) def _make_event(self, queue_name, row): return Event(queue_name, row) def _load_batch_events_old(self, curs, batch_id): """Fetch all events for this batch.""" # load events sql = "select * from pgq.get_batch_events(%d)" % batch_id if self.consumer_filter is not None: sql += " where %s" % self.consumer_filter curs.execute(sql) rows = curs.fetchall() # map them to python objects ev_list = [] for r in rows: ev = self._make_event(self.queue_name, r) ev_list.append(ev) return ev_list def _load_batch_events(self, curs, batch_id): """Fetch all events for this batch.""" if self.pgq_lazy_fetch: return self._batch_walker_class(curs, batch_id, self.queue_name, self.pgq_lazy_fetch, self.consumer_filter) else: return self._load_batch_events_old(curs, batch_id) def _load_next_batch(self, curs): """Allocate next batch. (internal)""" q = """select * from pgq.next_batch_custom(%s, %s, %s, %s, %s)""" curs.execute(q, [self.queue_name, self.consumer_name, self.pgq_min_lag, self.pgq_min_count, self.pgq_min_interval]) inf = curs.fetchone().copy() inf['tick_id'] = inf['cur_tick_id'] inf['batch_end'] = inf['cur_tick_time'] inf['batch_start'] = inf['prev_tick_time'] inf['seq_start'] = inf['prev_tick_event_seq'] inf['seq_end'] = inf['cur_tick_event_seq'] self.batch_info = inf return self.batch_info['batch_id'] def _finish_batch(self, curs, batch_id, ev_list): """Tag events and notify that the batch is done.""" curs.execute("select pgq.finish_batch(%s)", [batch_id]) def stat_start(self): t = time.time() self.stat_batch_start = t if self.stat_batch_start - self.idle_start > self.keepalive_stats: self.stat_put('idle', round(self.stat_batch_start - self.idle_start, 4)) self.idle_start = t def stat_end(self, count): t = time.time() self.stat_put('count', count) self.stat_put('duration', round(t - self.stat_batch_start, 4)) if count > 0: # reset timer if we got some events self.stat_put('idle', round(self.stat_batch_start - self.idle_start, 4)) self.idle_start = t python-pgq/pgq/cascade/000077500000000000000000000000001302126165200153345ustar00rootroot00000000000000python-pgq/pgq/cascade/__init__.py000066400000000000000000000001411302126165200174410ustar00rootroot00000000000000"""Cascaded Queue support.""" from __future__ import division, absolute_import, print_function python-pgq/pgq/cascade/admin.py000066400000000000000000001556701302126165200170140ustar00rootroot00000000000000#! /usr/bin/env python ## NB: not all commands work ## """Cascaded queue administration. londiste.py INI pause [NODE [CONS]] setadm.py INI pause NODE [CONS] """ from __future__ import division, absolute_import, print_function import optparse import os.path import sys import threading import time try: import queue as Queue except ImportError: import Queue # noqa import skytools from skytools import UsageError, DBError from pgq.cascade.nodeinfo import NodeInfo, QueueInfo __all__ = ['CascadeAdmin'] RESURRECT_DUMP_FILE = "resurrect-lost-events.json" command_usage = """\ %prog [options] INI CMD [subcmd args] Node Initialization: create-root NAME [PUBLIC_CONNSTR] create-branch NAME [PUBLIC_CONNSTR] --provider= create-leaf NAME [PUBLIC_CONNSTR] --provider= All of the above initialize a node Node Administration: pause Pause node worker resume Resume node worker wait-root Wait until node has caught up with root wait-provider Wait until node has caught up with provider status Show cascade state node-status Show status of local node members Show members in set Cascade layout change: change-provider --provider NEW_NODE Change where worker reads from takeover FROM_NODE [--all] [--dead] Take other node position drop-node NAME Remove node from cascade tag-dead NODE .. Tag node as dead tag-alive NODE .. Tag node as alive """ standalone_usage = """ setadm extra switches: pause/resume/change-provider: --node=NODE_NAME | --consumer=CONSUMER_NAME create-root/create-branch/create-leaf: --worker=WORKER_NAME """ class CascadeAdmin(skytools.AdminScript): """Cascaded PgQ administration.""" queue_name = None queue_info = None extra_objs = [] local_node = None root_node_name = None commands_without_pidfile = ['status', 'node-status', 'node-info'] def __init__(self, svc_name, dbname, args, worker_setup=False): super(CascadeAdmin, self).__init__(svc_name, args) self.initial_db_name = dbname if worker_setup: self.options.worker = self.job_name self.options.consumer = self.job_name def init_optparse(self, parser=None): """Add SetAdmin switches to parser.""" p = super(CascadeAdmin, self).init_optparse(parser) usage = command_usage + standalone_usage p.set_usage(usage.strip()) g = optparse.OptionGroup(p, "actual queue admin options") g.add_option("--connstr", action="store_true", help="initial connect string") g.add_option("--provider", help="init: connect string for provider") g.add_option("--queue", help="specify queue name") g.add_option("--worker", help="create: specify worker name") g.add_option("--node", help="specify node name") g.add_option("--consumer", help="specify consumer name") g.add_option("--target", help="takeover: specify node to take over") g.add_option("--merge", help="create-node: combined queue name") g.add_option("--dead", action="append", help="tag some node as dead") g.add_option("--dead-root", action="store_true", help="tag some node as dead") g.add_option("--dead-branch", action="store_true", help="tag some node as dead") g.add_option("--sync-watermark", help="list of node names to sync with") g.add_option("--nocheck", action="store_true", help="create: do not check public connect string") p.add_option_group(g) return p def reload(self): """Reload config.""" skytools.AdminScript.reload(self) if self.options.queue: self.queue_name = self.options.queue else: self.queue_name = self.cf.get('queue_name', '') if not self.queue_name: self.queue_name = self.cf.get('pgq_queue_name', '') if not self.queue_name: raise Exception('"queue_name" not specified in config') # # Node initialization. # def cmd_install(self): db = self.get_database(self.initial_db_name) self.install_code(db) def cmd_create_root(self, *args): return self.create_node('root', args) def cmd_create_branch(self, *args): return self.create_node('branch', args) def cmd_create_leaf(self, *args): return self.create_node('leaf', args) def create_node(self, node_type, args): """Generic node init.""" if node_type not in ('root', 'branch', 'leaf'): raise Exception('unknown node type') # load node name if len(args) > 0: node_name = args[0] else: node_name = self.cf.get('node_name', '') if not node_name: raise UsageError('Node name must be given either in command line or config') # load node public location if len(args) > 1: node_location = args[1] else: node_location = self.cf.get('public_node_location', '') if not node_location: raise UsageError('Node public location must be given either in command line or config') if len(args) > 2: raise UsageError('Too many args, only node name and public connect string allowed') # load provider provider_loc = self.options.provider if not provider_loc: provider_loc = self.cf.get('initial_provider_location', '') # check if sane ok = 0 for k, _ in skytools.parse_connect_string(node_location): if k in ('host', 'service'): ok = 1 break if not ok: self.log.warning('No host= in public connect string, bad idea') # connect to database db = self.get_database(self.initial_db_name) # check if code is installed self.install_code(db) # query current status res = self.exec_query(db, "select * from pgq_node.get_node_info(%s)", [self.queue_name]) info = res[0] if info['node_type'] is not None: self.log.info("Node is already initialized as %s", info['node_type']) return # check if public connstr is sane self.check_public_connstr(db, node_location) self.log.info("Initializing node") node_attrs = {} worker_name = self.options.worker if not worker_name: raise Exception('--worker required') combined_queue = self.options.merge if combined_queue and node_type != 'leaf': raise Exception('--merge can be used only for leafs') if self.options.sync_watermark: if node_type != 'branch': raise UsageError('--sync-watermark can be used only for branch nodes') node_attrs['sync_watermark'] = self.options.sync_watermark # register member if node_type == 'root': global_watermark = None combined_queue = None provider_name = None self.exec_cmd(db, "select * from pgq_node.register_location(%s, %s, %s, false)", [self.queue_name, node_name, node_location]) self.exec_cmd(db, "select * from pgq_node.create_node(%s, %s, %s, %s, %s, %s, %s)", [self.queue_name, node_type, node_name, worker_name, provider_name, global_watermark, combined_queue]) provider_db = None else: if not provider_loc: raise Exception('Please specify --provider') root_db = self.find_root_db(provider_loc) queue_info = self.load_queue_info(root_db) # check if member already exists if queue_info.get_member(node_name) is not None: self.log.error("Node '%s' already exists", node_name) sys.exit(1) provider_db = self.get_database('provider_db', connstr=provider_loc, profile='remote') q = "select node_type, node_name from pgq_node.get_node_info(%s)" res = self.exec_query(provider_db, q, [self.queue_name]) row = res[0] if not row['node_name']: raise Exception("provider node not found") provider_name = row['node_name'] # register member on root self.exec_cmd(root_db, "select * from pgq_node.register_location(%s, %s, %s, false)", [self.queue_name, node_name, node_location]) # lookup provider provider = queue_info.get_member(provider_name) if not provider: self.log.error("Node %s does not exist", provider_name) sys.exit(1) # register on provider self.exec_cmd(provider_db, "select * from pgq_node.register_location(%s, %s, %s, false)", [self.queue_name, node_name, node_location]) rows = self.exec_cmd(provider_db, "select * from pgq_node.register_subscriber(%s, %s, %s, null)", [self.queue_name, node_name, worker_name]) global_watermark = rows[0]['global_watermark'] # initialize node itself # insert members self.exec_cmd(db, "select * from pgq_node.register_location(%s, %s, %s, false)", [self.queue_name, node_name, node_location]) for m in queue_info.member_map.values(): self.exec_cmd(db, "select * from pgq_node.register_location(%s, %s, %s, %s)", [self.queue_name, m.name, m.location, m.dead]) # real init self.exec_cmd(db, "select * from pgq_node.create_node(%s, %s, %s, %s, %s, %s, %s)", [self.queue_name, node_type, node_name, worker_name, provider_name, global_watermark, combined_queue]) self.extra_init(node_type, db, provider_db) if node_attrs: s_attrs = skytools.db_urlencode(node_attrs) self.exec_cmd(db, "select * from pgq_node.set_node_attrs(%s, %s)", [self.queue_name, s_attrs]) self.log.info("Done") def check_public_connstr(self, db, pub_connstr): """Look if public and local connect strings point to same db's. """ if self.options.nocheck: return pub_db = self.get_database("pub_db", connstr=pub_connstr, profile='remote') curs1 = db.cursor() curs2 = pub_db.cursor() q = "select oid, datname, txid_current() as txid, txid_current_snapshot() as snap"\ " from pg_catalog.pg_database where datname = current_database()" curs1.execute(q) res1 = curs1.fetchone() db.commit() curs2.execute(q) res2 = curs2.fetchone() pub_db.commit() curs1.execute(q) res3 = curs1.fetchone() db.commit() self.close_database("pub_db") failure = 0 if (res1['oid'], res1['datname']) != (res2['oid'], res2['datname']): failure += 1 sn1 = skytools.Snapshot(res1['snap']) tx = res2['txid'] sn2 = skytools.Snapshot(res3['snap']) if sn1.contains(tx): failure += 2 elif not sn2.contains(tx): failure += 4 if failure: raise UsageError("Public connect string points to different database" " than local connect string (fail=%d)" % failure) def extra_init(self, node_type, node_db, provider_db): """Callback to do specific init.""" pass def find_root_db(self, initial_loc=None): """Find root node, having start point.""" if initial_loc: loc = initial_loc db = self.get_database('root_db', connstr=loc, profile='remote') else: loc = self.cf.get(self.initial_db_name) db = self.get_database('root_db', connstr=loc) while 1: # query current status res = self.exec_query(db, "select * from pgq_node.get_node_info(%s)", [self.queue_name]) info = res[0] node_type = info['node_type'] if node_type is None: self.log.info("Root node not initialized?") sys.exit(1) self.log.debug("db='%s' -- type='%s' provider='%s'", loc, node_type, info['provider_location']) # configured db may not be root anymore, walk upwards then if node_type in ('root', 'combined-root'): db.commit() self.root_node_name = info['node_name'] return db self.close_database('root_db') if loc == info['provider_location']: raise Exception("find_root_db: got loop: %s" % loc) loc = info['provider_location'] if loc is None: self.log.error("Sub node provider not initialized?") sys.exit(1) db = self.get_database('root_db', connstr=loc, profile='remote') raise Exception('process canceled') def find_root_node(self): self.find_root_db() return self.root_node_name def find_consumer_check(self, node, consumer): cmap = self.get_node_consumer_map(node) return consumer in cmap def find_consumer(self, node=None, consumer=None): if not node and not consumer: node = self.options.node consumer = self.options.consumer if not node and not consumer: raise Exception('Need either --node or --consumer') # specific node given if node: if consumer: if not self.find_consumer_check(node, consumer): raise Exception('Consumer not found') else: state = self.get_node_info(node) consumer = state.worker_name return (node, consumer) # global consumer search if self.find_consumer_check(self.local_node, consumer): return (self.local_node, consumer) # fixme: dead node handling? nodelist = self.queue_info.member_map.keys() for node in nodelist: if node == self.local_node: continue if self.find_consumer_check(node, consumer): return (node, consumer) raise Exception('Consumer not found') def install_code(self, db): """Install cascading code to db.""" objs = [ skytools.DBLanguage("plpgsql"), #skytools.DBFunction("txid_current_snapshot", 0, sql_file="txid.sql"), skytools.DBSchema("pgq", sql="create extension pgq"), #skytools.DBFunction("pgq.get_batch_cursor", 3, sql_file="pgq.upgrade.2to3.sql"), #skytools.DBSchema("pgq_ext", sql_file="pgq_ext.sql"), # not needed actually skytools.DBSchema("pgq_node", sql="create extension pgq_node"), ] objs += self.extra_objs skytools.db_install(db.cursor(), objs, self.log) db.commit() # # Print status of whole set. # def cmd_status(self): """Show set status.""" self.load_local_info() # prepare data for workers members = Queue.Queue() for m in self.queue_info.member_map.values(): cstr = self.add_connect_string_profile(m.location, 'remote') members.put((m.name, cstr)) nodes = Queue.Queue() # launch workers and wait num_nodes = len(self.queue_info.member_map) num_threads = max(min(num_nodes // 4, 100), 1) tlist = [] for _ in range(num_threads): t = threading.Thread(target=self._cmd_status_worker, args=(members, nodes)) t.daemon = True t.start() tlist.append(t) #members.join() for t in tlist: t.join() while True: try: node = nodes.get_nowait() except Queue.Empty: break self.queue_info.add_node(node) self.queue_info.print_tree() def _cmd_status_worker(self, members, nodes): # members in, nodes out, both thread-safe while True: try: node_name, node_connstr = members.get_nowait() except Queue.Empty: break node = self.load_node_status(node_name, node_connstr) nodes.put(node) members.task_done() def load_node_status(self, name, location): """ Load node info & status """ # must be thread-safe (!) if not self.node_alive(name): node = NodeInfo(self.queue_name, None, node_name=name) return node try: db = None db = skytools.connect_database(location) db.set_isolation_level(skytools.I_AUTOCOMMIT) curs = db.cursor() curs.execute("select * from pgq_node.get_node_info(%s)", [self.queue_name]) node = NodeInfo(self.queue_name, curs.fetchone()) node.load_status(curs) self.load_extra_status(curs, node) except DBError as d: msg = str(d).strip().split('\n', 1)[0].strip() print('Node %r failure: %s' % (name, msg)) node = NodeInfo(self.queue_name, None, node_name=name) finally: if db: db.close() return node def cmd_node_status(self): """ Show status of a local node. """ self.load_local_info() db = self.get_node_database(self.local_node) curs = db.cursor() node = self.queue_info.local_node node.load_status(curs) self.load_extra_status(curs, node) subscriber_nodes = self.get_node_subscriber_list(self.local_node) offset = 4 * ' ' print(node.get_title()) print(offset + 'Provider: %s' % node.provider_node) print(offset + 'Subscribers: %s' % ', '.join(subscriber_nodes)) for l in node.get_infolines(): print(offset + l) def load_extra_status(self, curs, node): """Fetch extra info.""" # must be thread-safe (!) pass # # Normal commands. # def cmd_change_provider(self): """Change node provider.""" self.load_local_info() self.change_provider(node=self.options.node, consumer=self.options.consumer, new_provider=self.options.provider) def node_change_provider(self, node, new_provider): self.change_provider(node, new_provider=new_provider) def change_provider(self, node=None, consumer=None, new_provider=None): old_provider = None if not new_provider: raise Exception('Please give --provider') if not node or not consumer: node, consumer = self.find_consumer(node=node, consumer=consumer) if node == new_provider: raise UsageError("cannot subscribe to itself") cmap = self.get_node_consumer_map(node) cinfo = cmap[consumer] old_provider = cinfo['provider_node'] if old_provider == new_provider: self.log.info("Consumer '%s' at node '%s' has already '%s' as provider", consumer, node, new_provider) return # pause target node self.pause_consumer(node, consumer) # reload node info node_db = self.get_node_database(node) qinfo = self.load_queue_info(node_db) ninfo = qinfo.local_node node_location = qinfo.get_member(node).location # reload consumer info cmap = self.get_node_consumer_map(node) cinfo = cmap[consumer] # is it node worker or plain consumer? is_worker = (ninfo.worker_name == consumer) # fixme: expect the node to be described already q = "select * from pgq_node.register_location(%s, %s, %s, false)" self.node_cmd(new_provider, q, [self.queue_name, node, node_location]) # subscribe on new provider if is_worker: q = 'select * from pgq_node.register_subscriber(%s, %s, %s, %s)' self.node_cmd(new_provider, q, [self.queue_name, node, consumer, cinfo['last_tick_id']]) else: q = 'select * from pgq.register_consumer_at(%s, %s, %s)' self.node_cmd(new_provider, q, [self.queue_name, consumer, cinfo['last_tick_id']]) # change provider on target node q = 'select * from pgq_node.change_consumer_provider(%s, %s, %s)' self.node_cmd(node, q, [self.queue_name, consumer, new_provider]) # done self.resume_consumer(node, consumer) # unsubscribe from old provider try: if is_worker: q = "select * from pgq_node.unregister_subscriber(%s, %s)" self.node_cmd(old_provider, q, [self.queue_name, node]) else: q = "select * from pgq.unregister_consumer(%s, %s)" self.node_cmd(old_provider, q, [self.queue_name, consumer]) except skytools.DBError as d: self.log.warning("failed to unregister from old provider (%s): %s", old_provider, str(d)) def cmd_rename_node(self, old_name, new_name): """Rename node.""" self.load_local_info() root_db = self.find_root_db() # pause target node self.pause_node(old_name) node = self.load_node_info(old_name) provider_node = node.provider_node subscriber_list = self.get_node_subscriber_list(old_name) # create copy of member info / subscriber+queue info step1 = 'select * from pgq_node.rename_node_step1(%s, %s, %s)' # rename node itself, drop copies step2 = 'select * from pgq_node.rename_node_step2(%s, %s, %s)' # step1 self.exec_cmd(root_db, step1, [self.queue_name, old_name, new_name]) self.node_cmd(provider_node, step1, [self.queue_name, old_name, new_name]) self.node_cmd(old_name, step1, [self.queue_name, old_name, new_name]) for child in subscriber_list: self.node_cmd(child, step1, [self.queue_name, old_name, new_name]) # step1 self.node_cmd(old_name, step2, [self.queue_name, old_name, new_name]) self.node_cmd(provider_node, step1, [self.queue_name, old_name, new_name]) for child in subscriber_list: self.node_cmd(child, step2, [self.queue_name, old_name, new_name]) self.exec_cmd(root_db, step2, [self.queue_name, old_name, new_name]) # resume node self.resume_node(old_name) def cmd_drop_node(self, node_name): """Drop a node.""" self.load_local_info() node = None try: node = self.load_node_info(node_name) if node: # see if we can safely drop subscriber_list = self.get_node_subscriber_list(node_name) if subscriber_list: raise UsageError('node still has subscribers') except skytools.DBError: pass try: # unregister node location from root node (event will be added to queue) if node and node.type == 'root': pass else: root_db = self.find_root_db() q = "select * from pgq_node.unregister_location(%s, %s)" self.exec_cmd(root_db, q, [self.queue_name, node_name]) except skytools.DBError as d: self.log.warning("Unregister from root failed: %s", str(d)) try: # drop node info db = self.get_node_database(node_name) q = "select * from pgq_node.drop_node(%s, %s)" self.exec_cmd(db, q, [self.queue_name, node_name]) except skytools.DBError as d: self.log.warning("Local drop failure: %s", str(d)) # brute force removal for n in self.queue_info.member_map.values(): try: q = "select * from pgq_node.drop_node(%s, %s)" self.node_cmd(n.name, q, [self.queue_name, node_name]) except skytools.DBError as d: self.log.warning("Failed to remove from '%s': %s", n.name, str(d)) def node_depends(self, sub_node, top_node): cur_node = sub_node # walk upstream while 1: info = self.get_node_info(cur_node) if cur_node == top_node: # yes, top_node is sub_node's provider return True if info.type == 'root': # found root, no dependancy return False # step upwards cur_node = info.provider_node def demote_node(self, oldnode, step, newnode): """Downgrade old root?""" q = "select * from pgq_node.demote_root(%s, %s, %s)" res = self.node_cmd(oldnode, q, [self.queue_name, step, newnode]) if res: return res[0]['last_tick'] def promote_branch(self, node): """Promote old branch as root.""" q = "select * from pgq_node.promote_branch(%s)" self.node_cmd(node, q, [self.queue_name]) def wait_for_catchup(self, new, last_tick): """Wait until new_node catches up to old_node.""" # wait for it on subscriber info = self.load_node_info(new) if info.completed_tick >= last_tick: self.log.info('tick already exists') return info if info.paused: self.log.info('new node seems paused, resuming') self.resume_node(new) while 1: self.log.debug('waiting for catchup: need=%d, cur=%d', last_tick, info.completed_tick) time.sleep(1) info = self.load_node_info(new) if info.completed_tick >= last_tick: return info def takeover_root(self, old_node_name, new_node_name, failover=False): """Root switchover.""" new_info = self.get_node_info(new_node_name) old_info = None if self.node_alive(old_node_name): # old root works, switch properly old_info = self.get_node_info(old_node_name) self.pause_node(old_node_name) self.demote_node(old_node_name, 1, new_node_name) last_tick = self.demote_node(old_node_name, 2, new_node_name) self.wait_for_catchup(new_node_name, last_tick) else: # find latest tick on local node q = "select * from pgq.get_queue_info(%s)" db = self.get_node_database(new_node_name) curs = db.cursor() curs.execute(q, [self.queue_name]) row = curs.fetchone() last_tick = row['last_tick_id'] db.commit() # find if any other node has more ticks other_node = None other_tick = last_tick sublist = self.find_subscribers_for(old_node_name) for n in sublist: q = "select * from pgq_node.get_node_info(%s)" rows = self.node_cmd(n, q, [self.queue_name]) info = rows[0] if info['worker_last_tick'] > other_tick: other_tick = info['worker_last_tick'] other_node = n # if yes, load batches from there if other_node: self.change_provider(new_node_name, new_provider=other_node) self.wait_for_catchup(new_node_name, other_tick) last_tick = other_tick # promote new root self.pause_node(new_node_name) self.promote_branch(new_node_name) # register old root on new root as subscriber if self.node_alive(old_node_name): old_worker_name = old_info.worker_name else: old_worker_name = self.failover_consumer_name(old_node_name) q = 'select * from pgq_node.register_subscriber(%s, %s, %s, %s)' self.node_cmd(new_node_name, q, [self.queue_name, old_node_name, old_worker_name, last_tick]) # unregister new root from old root q = "select * from pgq_node.unregister_subscriber(%s, %s)" self.node_cmd(new_info.provider_node, q, [self.queue_name, new_node_name]) # launch new node self.resume_node(new_node_name) # demote & launch old node if self.node_alive(old_node_name): self.demote_node(old_node_name, 3, new_node_name) self.resume_node(old_node_name) def takeover_nonroot(self, old_node_name, new_node_name, failover): """Non-root switchover.""" if self.node_depends(new_node_name, old_node_name): # yes, old_node is new_nodes provider, # switch it around pnode = self.find_provider(old_node_name) self.node_change_provider(new_node_name, pnode) self.node_change_provider(old_node_name, new_node_name) def cmd_takeover(self, old_node_name): """Generic node switchover.""" self.log.info("old: %s", old_node_name) self.load_local_info() new_node_name = self.options.node if not new_node_name: worker = self.options.consumer if not worker: raise UsageError('old node not given') if self.queue_info.local_node.worker_name != worker: raise UsageError('old node not given') new_node_name = self.local_node if not old_node_name: raise UsageError('old node not given') if old_node_name not in self.queue_info.member_map: raise UsageError('Unknown node: %s' % old_node_name) if self.options.dead_root: otype = 'root' failover = True elif self.options.dead_branch: otype = 'branch' failover = True else: onode = self.get_node_info(old_node_name) otype = onode.type failover = False if failover: self.cmd_tag_dead(old_node_name) new_node = self.get_node_info(new_node_name) if old_node_name == new_node.name: self.log.info("same node?") return if otype == 'root': self.takeover_root(old_node_name, new_node_name, failover) else: self.takeover_nonroot(old_node_name, new_node_name, failover) # switch subscribers around if self.options.all or failover: for n in self.find_subscribers_for(old_node_name): if n != new_node_name: self.node_change_provider(n, new_node_name) def find_provider(self, node_name): if self.node_alive(node_name): info = self.get_node_info(node_name) return info.provider_node nodelist = self.queue_info.member_map.keys() for n in nodelist: if n == node_name: continue if not self.node_alive(n): continue if node_name in self.get_node_subscriber_list(n): return n return self.find_root_node() def find_subscribers_for(self, parent_node_name): """Find subscribers for particular node.""" # use dict to eliminate duplicates res = {} nodelist = self.queue_info.member_map.keys() for node_name in nodelist: if node_name == parent_node_name: continue if not self.node_alive(node_name): continue n = self.get_node_info(node_name) if not n: continue if n.provider_node == parent_node_name: res[n.name] = 1 return res.keys() def cmd_tag_dead(self, dead_node_name): self.load_local_info() # tag node dead in memory self.log.info("Tagging node '%s' as dead", dead_node_name) self.queue_info.tag_dead(dead_node_name) # tag node dead in local node q = "select * from pgq_node.register_location(%s, %s, null, true)" self.node_cmd(self.local_node, q, [self.queue_name, dead_node_name]) # tag node dead in other nodes nodelist = self.queue_info.member_map.keys() for node_name in nodelist: if not self.node_alive(node_name): continue if node_name == dead_node_name: continue if node_name == self.local_node: continue try: q = "select * from pgq_node.register_location(%s, %s, null, true)" self.node_cmd(node_name, q, [self.queue_name, dead_node_name]) except DBError as d: msg = str(d).strip().split('\n', 1)[0] print('Node %s failure: %s' % (node_name, msg)) self.close_node_database(node_name) def cmd_pause(self): """Pause a node""" self.load_local_info() node, consumer = self.find_consumer() self.pause_consumer(node, consumer) def cmd_resume(self): """Resume a node from pause.""" self.load_local_info() node, consumer = self.find_consumer() self.resume_consumer(node, consumer) def cmd_members(self): """Show member list.""" self.load_local_info() db = self.get_database(self.initial_db_name) desc = 'Member info on %s@%s:' % (self.local_node, self.queue_name) q = "select node_name, dead, node_location"\ " from pgq_node.get_queue_locations(%s) order by 1" self.display_table(db, desc, q, [self.queue_name]) def cmd_node_info(self): self.load_local_info() q = self.queue_info n = q.local_node m = q.get_member(n.name) stlist = [] if m.dead: stlist.append('DEAD') if n.paused: stlist.append("PAUSED") if not n.uptodate: stlist.append("NON-UP-TO-DATE") st = ', '.join(stlist) if not st: st = 'OK' print('Node: %s Type: %s Queue: %s' % (n.name, n.type, q.queue_name)) print('Status: %s' % st) if n.type != 'root': print('Provider: %s' % n.provider_node) else: print('Provider: --') print('Connect strings:') print(' Local : %s' % self.cf.get('db')) print(' Public : %s' % m.location) if n.type != 'root': print(' Provider: %s' % n.provider_location) if n.combined_queue: print('Combined Queue: %s (node type: %s)' % (n.combined_queue, n.combined_type)) def cmd_wait_root(self): """Wait for next tick from root.""" self.load_local_info() if self.queue_info.local_node.type == 'root': self.log.info("Current node is root, no need to wait") return self.log.info("Finding root node") root_node = self.find_root_node() self.log.info("Root is %s", root_node) dst_db = self.get_database(self.initial_db_name) self.wait_for_node(dst_db, root_node) def cmd_wait_provider(self): """Wait for next tick from provider.""" self.load_local_info() if self.queue_info.local_node.type == 'root': self.log.info("Current node is root, no need to wait") return dst_db = self.get_database(self.initial_db_name) node = self.queue_info.local_node.provider_node self.log.info("Provider is %s", node) self.wait_for_node(dst_db, node) def wait_for_node(self, dst_db, node_name): """Core logic for waiting.""" self.log.info("Fetching last tick for %s", node_name) node_info = self.load_node_info(node_name) tick_id = node_info.last_tick self.log.info("Waiting for tick > %d", tick_id) q = "select * from pgq_node.get_node_info(%s)" dst_curs = dst_db.cursor() while 1: dst_curs.execute(q, [self.queue_name]) row = dst_curs.fetchone() dst_db.commit() if row['ret_code'] >= 300: self.log.warning("Problem: [%s] %s", row['ret_code'], row['ret_note']) return if row['worker_last_tick'] > tick_id: self.log.info("Got tick %d, exiting", row['worker_last_tick']) break self.sleep(2) def cmd_resurrect(self): """Convert out-of-sync old root to branch and sync queue contents. """ self.load_local_info() db = self.get_database(self.initial_db_name) curs = db.cursor() # stop if leaf if self.queue_info.local_node.type == 'leaf': self.log.info("Current node is leaf, nothing to do") return # stop if dump file exists if os.path.lexists(RESURRECT_DUMP_FILE): self.log.error("Dump file exists, cannot perform resurrection: %s", RESURRECT_DUMP_FILE) sys.exit(1) # # Find failover position # self.log.info("** Searching for gravestone **") # load subscribers sub_list = [] q = "select * from pgq_node.get_subscriber_info(%s)" curs.execute(q, [self.queue_name]) for row in curs.fetchall(): sub_list.append(row['node_name']) db.commit() # find backup subscription this_node = self.queue_info.local_node.name failover_cons = self.failover_consumer_name(this_node) full_list = self.queue_info.member_map.keys() done_nodes = {this_node: 1} prov_node = None root_node = None for node_name in sub_list + full_list: if node_name in done_nodes: continue done_nodes[node_name] = 1 if not self.node_alive(node_name): self.log.info('Node %s is dead, skipping', node_name) continue self.log.info('Looking on node %s', node_name) node_db = None try: node_db = self.get_node_database(node_name) node_curs = node_db.cursor() node_curs.execute("select * from pgq.get_consumer_info(%s, %s)", [self.queue_name, failover_cons]) cons_rows = node_curs.fetchall() node_curs.execute("select * from pgq_node.get_node_info(%s)", [self.queue_name]) node_info = node_curs.fetchone() node_db.commit() if len(cons_rows) == 1: if prov_node: raise Exception('Unexpected situation: there are two gravestones' ' - on nodes %s and %s' % (prov_node, node_name)) prov_node = node_name failover_tick = cons_rows[0]['last_tick'] self.log.info("Found gravestone on node: %s", node_name) if node_info['node_type'] == 'root': self.log.info("Found new root node: %s", node_name) root_node = node_name self.close_node_database(node_name) node_db = None if root_node and prov_node: break except skytools.DBError: self.log.warning("failed to check node %s", node_name) if node_db: self.close_node_database(node_name) node_db = None if not root_node: self.log.error("Cannot find new root node") sys.exit(1) if not prov_node: self.log.error("Cannot find failover position (%s)", failover_cons) sys.exit(1) # load worker state q = "select * from pgq_node.get_worker_state(%s)" rows = self.exec_cmd(db, q, [self.queue_name]) state = rows[0] # demote & pause self.log.info("** Demote & pause local node **") if self.queue_info.local_node.type == 'root': self.log.info('Node %s is root, demoting', this_node) q = "select * from pgq_node.demote_root(%s, %s, %s)" self.exec_cmd(db, q, [self.queue_name, 1, prov_node]) self.exec_cmd(db, q, [self.queue_name, 2, prov_node]) # change node type and set worker paused in same TX curs = db.cursor() self.exec_cmd(curs, q, [self.queue_name, 3, prov_node]) q = "select * from pgq_node.set_consumer_paused(%s, %s, true)" self.exec_cmd(curs, q, [self.queue_name, state['worker_name']]) db.commit() elif not state['paused']: # pause worker, don't wait for reaction, as it may be dead self.log.info('Node %s is branch, pausing worker: %s', this_node, state['worker_name']) q = "select * from pgq_node.set_consumer_paused(%s, %s, true)" self.exec_cmd(db, q, [self.queue_name, state['worker_name']]) else: self.log.info('Node %s is branch and worker is paused', this_node) # # Drop old consumers and subscribers # self.log.info("** Dropping old subscribers and consumers **") # unregister subscriber nodes q = "select pgq_node.unregister_subscriber(%s, %s)" for node_name in sub_list: self.log.info("Dropping old subscriber node: %s", node_name) curs.execute(q, [self.queue_name, node_name]) # unregister consumers q = "select consumer_name from pgq.get_consumer_info(%s)" curs.execute(q, [self.queue_name]) for row in curs.fetchall(): cname = row['consumer_name'] if cname[0] == '.': self.log.info("Keeping consumer: %s", cname) continue self.log.info("Dropping old consumer: %s", cname) q = "pgq.unregister_consumer(%s, %s)" curs.execute(q, [self.queue_name, cname]) db.commit() # dump events self.log.info("** Dump & delete lost events **") stats = self.resurrect_process_lost_events(db, failover_tick) self.log.info("** Subscribing %s to %s **", this_node, prov_node) # set local position self.log.info("Reset local completed pos") q = "select * from pgq_node.set_consumer_completed(%s, %s, %s)" self.exec_cmd(db, q, [self.queue_name, state['worker_name'], failover_tick]) # rename gravestone self.log.info("Rename gravestone to worker: %s", state['worker_name']) prov_db = self.get_node_database(prov_node) prov_curs = prov_db.cursor() q = "select * from pgq_node.unregister_subscriber(%s, %s)" self.exec_cmd(prov_curs, q, [self.queue_name, this_node], quiet=True) q = "select ret_code, ret_note, global_watermark"\ " from pgq_node.register_subscriber(%s, %s, %s, %s)" res = self.exec_cmd(prov_curs, q, [self.queue_name, this_node, state['worker_name'], failover_tick], quiet=True) global_wm = res[0]['global_watermark'] prov_db.commit() # import new global watermark self.log.info("Reset global watermark") q = "select * from pgq_node.set_global_watermark(%s, %s)" self.exec_cmd(db, q, [self.queue_name, global_wm], quiet=True) # show stats if stats: self.log.info("** Statistics **") klist = stats.keys() klist.sort() for k in klist: v = stats[k] self.log.info(" %s: %s", k, v) self.log.info("** Resurrection done, worker paused **") def resurrect_process_lost_events(self, db, failover_tick): curs = db.cursor() this_node = self.queue_info.local_node.name cons_name = this_node + '.dumper' self.log.info("Dumping lost events") # register temp consumer on queue q = "select pgq.register_consumer_at(%s, %s, %s)" curs.execute(q, [self.queue_name, cons_name, failover_tick]) db.commit() # process events as usual total_count = 0 final_tick_id = -1 stats = {} while 1: q = "select * from pgq.next_batch_info(%s, %s)" curs.execute(q, [self.queue_name, cons_name]) b = curs.fetchone() batch_id = b['batch_id'] if batch_id is None: break final_tick_id = b['cur_tick_id'] q = "select * from pgq.get_batch_events(%s)" curs.execute(q, [batch_id]) cnt = 0 for ev in curs.fetchall(): cnt += 1 total_count += 1 self.resurrect_dump_event(ev, stats, b) q = "select pgq.finish_batch(%s)" curs.execute(q, [batch_id]) if cnt > 0: db.commit() stats['dumped_count'] = total_count self.resurrect_dump_finish() self.log.info("%s events dumped", total_count) # unregiser consumer q = "select pgq.unregister_consumer(%s, %s)" curs.execute(q, [self.queue_name, cons_name]) db.commit() if failover_tick == final_tick_id: self.log.info("No batches found") return None # # Delete the events from queue # # This is done snapshots, to make sure we delete only events # that were dumped out previously. This uses the long-tx # resistant logic described in pgq.batch_event_sql(). # # find snapshots q = "select t1.tick_snapshot as s1, t2.tick_snapshot as s2"\ " from pgq.tick t1, pgq.tick t2"\ " where t1.tick_id = %s"\ " and t2.tick_id = %s" curs.execute(q, [failover_tick, final_tick_id]) ticks = curs.fetchone() s1 = skytools.Snapshot(ticks['s1']) s2 = skytools.Snapshot(ticks['s2']) xlist = [] for tx in s1.txid_list: if s2.contains(tx): xlist.append(str(tx)) # create where clauses where1 = None if len(xlist) > 0: where1 = "ev_txid in (%s)" % (",".join(xlist),) where2 = ("ev_txid >= %d AND ev_txid <= %d" # noqa " and not txid_visible_in_snapshot(ev_txid, '%s')" " and txid_visible_in_snapshot(ev_txid, '%s')" % ( s1.xmax, s2.xmax, ticks['s1'], ticks['s2'])) # loop over all queue data tables q = "select * from pgq.queue where queue_name = %s" curs.execute(q, [self.queue_name]) row = curs.fetchone() ntables = row['queue_ntables'] tbl_pfx = row['queue_data_pfx'] schema, table = tbl_pfx.split('.') total_del_count = 0 self.log.info("Deleting lost events") for i in range(ntables): del_count = 0 self.log.debug("Deleting events from table %d", i) qtbl = "%s.%s" % (skytools.quote_ident(schema), skytools.quote_ident(table + '_' + str(i))) q = "delete from " + qtbl + " where " if where1: self.log.debug(q + where1) curs.execute(q + where1) if curs.rowcount and curs.rowcount > 0: del_count += curs.rowcount self.log.debug(q + where2) curs.execute(q + where2) if curs.rowcount and curs.rowcount > 0: del_count += curs.rowcount total_del_count += del_count self.log.debug('%d events deleted', del_count) self.log.info('%d events deleted', total_del_count) stats['deleted_count'] = total_del_count # delete new ticks q = "delete from pgq.tick t using pgq.queue q"\ " where q.queue_name = %s"\ " and t.tick_queue = q.queue_id"\ " and t.tick_id > %s"\ " and t.tick_id <= %s" curs.execute(q, [self.queue_name, failover_tick, final_tick_id]) self.log.info("%s ticks deleted", curs.rowcount) db.commit() return stats _json_dump_file = None def resurrect_dump_event(self, ev, stats, batch_info): if self._json_dump_file is None: self._json_dump_file = open(RESURRECT_DUMP_FILE, 'w') sep = '[' else: sep = ',' # create ordinary dict to avoid problems with row class and datetime d = { 'ev_id': ev.ev_id, 'ev_type': ev.ev_type, 'ev_data': ev.ev_data, 'ev_extra1': ev.ev_extra1, 'ev_extra2': ev.ev_extra2, 'ev_extra3': ev.ev_extra3, 'ev_extra4': ev.ev_extra4, 'ev_time': ev.ev_time.isoformat(), 'ev_txid': ev.ev_txid, 'ev_retry': ev.ev_retry, 'tick_id': batch_info['cur_tick_id'], 'prev_tick_id': batch_info['prev_tick_id'], } jsev = skytools.json_encode(d) s = sep + '\n' + jsev self._json_dump_file.write(s) def resurrect_dump_finish(self): if self._json_dump_file: self._json_dump_file.write('\n]\n') self._json_dump_file.close() self._json_dump_file = None def failover_consumer_name(self, node_name): return node_name + ".gravestone" # # Shortcuts for operating on nodes. # def load_local_info(self): """fetch set info from local node.""" db = self.get_database(self.initial_db_name) self.queue_info = self.load_queue_info(db) self.local_node = self.queue_info.local_node.name def get_node_database(self, node_name): """Connect to node.""" if node_name == self.queue_info.local_node.name: db = self.get_database(self.initial_db_name) else: m = self.queue_info.get_member(node_name) if not m: self.log.error("get_node_database: cannot resolve %s", node_name) sys.exit(1) #self.log.info("%s: dead=%s", m.name, m.dead) if m.dead: return None loc = m.location db = self.get_database('node.' + node_name, connstr=loc, profile='remote') return db def node_alive(self, node_name): m = self.queue_info.get_member(node_name) if not m: res = False elif m.dead: res = False else: res = True #self.log.warning('node_alive(%s) = %s', node_name, res) return res def close_node_database(self, node_name): """Disconnect node's connection.""" if node_name == self.queue_info.local_node.name: self.close_database(self.initial_db_name) else: self.close_database("node." + node_name) def node_cmd(self, node_name, sql, args, quiet=False): """Execute SQL command on particular node.""" db = self.get_node_database(node_name) if not db: self.log.warning("ignoring cmd for dead node '%s': %s", node_name, skytools.quote_statement(sql, args)) return None return self.exec_cmd(db, sql, args, quiet=quiet, prefix=node_name) # # Various operation on nodes. # def set_paused(self, node, consumer, pause_flag): """Set node pause flag and wait for confirmation.""" q = "select * from pgq_node.set_consumer_paused(%s, %s, %s)" self.node_cmd(node, q, [self.queue_name, consumer, pause_flag]) self.log.info('Waiting for worker to accept') while 1: q = "select * from pgq_node.get_consumer_state(%s, %s)" stat = self.node_cmd(node, q, [self.queue_name, consumer], quiet=True)[0] if stat['paused'] != pause_flag: raise Exception('operation canceled? %s <> %s' % (repr(stat['paused']), repr(pause_flag))) if stat['uptodate']: op = pause_flag and "paused" or "resumed" self.log.info("Consumer '%s' on node '%s' %s", consumer, node, op) return time.sleep(1) raise Exception('process canceled') def pause_consumer(self, node, consumer): """Shortcut for pausing by name.""" self.set_paused(node, consumer, True) def resume_consumer(self, node, consumer): """Shortcut for resuming by name.""" self.set_paused(node, consumer, False) def pause_node(self, node): """Shortcut for pausing by name.""" state = self.get_node_info(node) self.pause_consumer(node, state.worker_name) def resume_node(self, node): """Shortcut for resuming by name.""" state = self.get_node_info(node) if state: self.resume_consumer(node, state.worker_name) def subscribe_node(self, target_node, subscriber_node, tick_pos): """Subscribing one node to another.""" q = "select * from pgq_node.subscribe_node(%s, %s, %s)" self.node_cmd(target_node, q, [self.queue_name, subscriber_node, tick_pos]) def unsubscribe_node(self, target_node, subscriber_node): """Unsubscribing one node from another.""" q = "select * from pgq_node.unsubscribe_node(%s, %s)" self.node_cmd(target_node, q, [self.queue_name, subscriber_node]) _node_cache = {} def get_node_info(self, node_name): """Cached node info lookup.""" if node_name in self._node_cache: return self._node_cache[node_name] inf = self.load_node_info(node_name) self._node_cache[node_name] = inf return inf def load_node_info(self, node_name): """Non-cached node info lookup.""" db = self.get_node_database(node_name) if not db: self.log.warning('load_node_info(%s): ignoring dead node', node_name) return None q = "select * from pgq_node.get_node_info(%s)" rows = self.exec_query(db, q, [self.queue_name]) return NodeInfo(self.queue_name, rows[0]) def load_queue_info(self, db): """Non-cached set info lookup.""" res = self.exec_query(db, "select * from pgq_node.get_node_info(%s)", [self.queue_name]) info = res[0] q = "select * from pgq_node.get_queue_locations(%s)" member_list = self.exec_query(db, q, [self.queue_name]) qinf = QueueInfo(self.queue_name, info, member_list) if self.options.dead: for node in self.options.dead: self.log.info("Assuming node '%s' as dead", node) qinf.tag_dead(node) return qinf def get_node_subscriber_list(self, node_name): """Fetch subscriber list from a node.""" q = "select node_name, node_watermark from pgq_node.get_subscriber_info(%s)" db = self.get_node_database(node_name) rows = self.exec_query(db, q, [self.queue_name]) return [r['node_name'] for r in rows] def get_node_consumer_map(self, node_name): """Fetch consumer list from a node.""" q = "select consumer_name, provider_node, last_tick_id from pgq_node.get_consumer_info(%s)" db = self.get_node_database(node_name) rows = self.exec_query(db, q, [self.queue_name]) res = {} for r in rows: res[r['consumer_name']] = r return res if __name__ == '__main__': script = CascadeAdmin('setadm', 'node_db', sys.argv[1:], worker_setup=False) script.start() python-pgq/pgq/cascade/consumer.py000066400000000000000000000234501302126165200175450ustar00rootroot00000000000000"""Cascaded consumer. Does not maintain node, but is able to pause, resume and switch provider. """ from __future__ import division, absolute_import, print_function import sys import time from pgq.baseconsumer import BaseConsumer PDB = '_provider_db' __all__ = ['CascadedConsumer'] class CascadedConsumer(BaseConsumer): """CascadedConsumer base class. Loads provider from target node, accepts pause/resume commands. """ _consumer_state = None def __init__(self, service_name, db_name, args): """Initialize new consumer. @param service_name: service_name for DBScript @param db_name: target database name for get_database() @param args: cmdline args for DBScript """ super(CascadedConsumer, self).__init__(service_name, PDB, args) self.log.debug("__init__") self.target_db = db_name self.provider_connstr = None def init_optparse(self, parser=None): p = super(CascadedConsumer, self).init_optparse(parser) p.add_option("--provider", help="provider location for --register") p.add_option("--rewind", action="store_true", help="change queue position according to destination") p.add_option("--reset", action="store_true", help="reset queue position on destination side") return p def startup(self): if self.options.rewind: self.rewind() sys.exit(0) if self.options.reset: self.dst_reset() sys.exit(0) return super(CascadedConsumer, self).startup() def register_consumer(self, provider_loc=None): """Register consumer on source node first, then target node.""" if not provider_loc: provider_loc = self.options.provider if not provider_loc: self.log.error('Please give provider location with --provider=') sys.exit(1) dst_db = self.get_database(self.target_db) #dst_curs = dst_db.cursor() src_db = self.get_database(PDB, connstr=provider_loc, profile='remote') src_curs = src_db.cursor() # check target info q = "select * from pgq_node.get_node_info(%s)" res = self.exec_cmd(src_db, q, [self.queue_name]) pnode = res[0]['node_name'] if not pnode: raise Exception('parent node not initialized?') # source queue super(CascadedConsumer, self).register_consumer() # fetch pos q = "select last_tick from pgq.get_consumer_info(%s, %s)" src_curs.execute(q, [self.queue_name, self.consumer_name]) last_tick = src_curs.fetchone()['last_tick'] if not last_tick: raise Exception('registration failed?') src_db.commit() # target node q = "select * from pgq_node.register_consumer(%s, %s, %s, %s)" self.exec_cmd(dst_db, q, [self.queue_name, self.consumer_name, pnode, last_tick]) def get_consumer_state(self): dst_db = self.get_database(self.target_db) q = "select * from pgq_node.get_consumer_state(%s, %s)" rows = self.exec_cmd(dst_db, q, [self.queue_name, self.consumer_name]) state = rows[0] return state def get_provider_db(self, state): provider_loc = state['provider_location'] return self.get_database(PDB, connstr=provider_loc, profile='remote') def unregister_consumer(self): dst_db = self.get_database(self.target_db) state = self.get_consumer_state() self.get_provider_db(state) # unregister on provider super(CascadedConsumer, self).unregister_consumer() # unregister on subscriber q = "select * from pgq_node.unregister_consumer(%s, %s)" self.exec_cmd(dst_db, q, [self.queue_name, self.consumer_name]) def rewind(self): self.log.info("Rewinding queue") dst_db = self.get_database(self.target_db) state = self.get_consumer_state() src_db = self.get_provider_db(state) src_curs = src_db.cursor() dst_tick = state['completed_tick'] if dst_tick: q = "select pgq.register_consumer_at(%s, %s, %s)" src_curs.execute(q, [self.queue_name, self.consumer_name, dst_tick]) else: self.log.warning('No tick found on dst side') dst_db.commit() src_db.commit() def dst_reset(self): self.log.info("Resetting queue tracking on dst side") dst_db = self.get_database(self.target_db) dst_curs = dst_db.cursor() state = self.get_consumer_state() src_db = self.get_provider_db(state) src_curs = src_db.cursor() # fetch last tick from source q = "select last_tick from pgq.get_consumer_info(%s, %s)" src_curs.execute(q, [self.queue_name, self.consumer_name]) row = src_curs.fetchone() src_db.commit() # on root node we dont have consumer info if not row: self.log.info("No info about consumer, cannot reset") return # set on destination last_tick = row['last_tick'] q = "select * from pgq_node.set_consumer_completed(%s, %s, %s)" dst_curs.execute(q, [self.queue_name, self.consumer_name, last_tick]) dst_db.commit() def process_batch(self, src_db, batch_id, event_list): state = self._consumer_state dst_db = self.get_database(self.target_db) if self.is_batch_done(state, self.batch_info, dst_db): return tick_id = self.batch_info['tick_id'] self.process_remote_batch(src_db, tick_id, event_list, dst_db) # this also commits self.finish_remote_batch(src_db, dst_db, tick_id) def process_root_node(self, dst_db): """This is called on root node, where no processing should happen. """ # extra sleep time.sleep(10 * self.loop_delay) self.log.info('{standby: 1}') def work(self): """Refresh state before calling Consumer.work().""" dst_db = self.get_database(self.target_db) self._consumer_state = self.refresh_state(dst_db) if self._consumer_state['node_type'] == 'root': self.process_root_node(dst_db) return if not self.provider_connstr: raise Exception('provider_connstr not set') self.get_provider_db(self._consumer_state) return super(CascadedConsumer, self).work() def refresh_state(self, dst_db, full_logic=True): """Fetch consumer state from target node. This also sleeps if pause is set and updates "uptodate" flag to notify that data is refreshed. """ while 1: q = "select * from pgq_node.get_consumer_state(%s, %s)" rows = self.exec_cmd(dst_db, q, [self.queue_name, self.consumer_name]) state = rows[0] # tag refreshed if not state['uptodate'] and full_logic: q = "select * from pgq_node.set_consumer_uptodate(%s, %s, true)" self.exec_cmd(dst_db, q, [self.queue_name, self.consumer_name]) if state['cur_error'] and self.work_state != -1: q = "select * from pgq_node.set_consumer_error(%s, %s, NULL)" self.exec_cmd(dst_db, q, [self.queue_name, self.consumer_name]) if not state['paused'] or not full_logic: break time.sleep(self.loop_delay) # update connection loc = state['provider_location'] if self.provider_connstr != loc: self.close_database(PDB) self.provider_connstr = loc # re-initialize provider connection self.get_provider_db(state) return state def is_batch_done(self, state, batch_info, dst_db): cur_tick = batch_info['tick_id'] prev_tick = batch_info['prev_tick_id'] dst_tick = state['completed_tick'] if not dst_tick: raise Exception('dst_tick NULL?') if prev_tick == dst_tick: # on track return False if cur_tick == dst_tick: # current batch is already applied, skip it return True # anything else means problems raise Exception('Lost position: batch %s..%s, dst has %s' % ( prev_tick, cur_tick, dst_tick)) def process_remote_batch(self, src_db, tick_id, event_list, dst_db): """Per-batch callback. By default just calls process_remote_event() in loop.""" src_curs = src_db.cursor() dst_curs = dst_db.cursor() for ev in event_list: self.process_remote_event(src_curs, dst_curs, ev) def process_remote_event(self, src_curs, dst_curs, ev): """Per-event callback. By default ignores cascading events and gives error on others. Can be called from user handler to finish unprocessed events. """ if ev.ev_type[:4] == "pgq.": # ignore cascading events pass else: raise Exception('Unhandled event type in queue: %s' % ev.ev_type) def finish_remote_batch(self, src_db, dst_db, tick_id): """Called after event processing. This should finish work on remote db and commit there. """ # this also commits q = "select * from pgq_node.set_consumer_completed(%s, %s, %s)" self.exec_cmd(dst_db, q, [self.queue_name, self.consumer_name, tick_id]) def exception_hook(self, det, emsg): try: dst_db = self.get_database(self.target_db) q = "select * from pgq_node.set_consumer_error(%s, %s, %s)" self.exec_cmd(dst_db, q, [self.queue_name, self.consumer_name, emsg]) except: self.log.warning("Failure to call pgq_node.set_consumer_error()") self.reset() super(CascadedConsumer, self).exception_hook(det, emsg) python-pgq/pgq/cascade/nodeinfo.py000066400000000000000000000216271302126165200175170ustar00rootroot00000000000000#! /usr/bin/env python """Info about node/set/members. For admin tool. """ from __future__ import division, absolute_import, print_function import datetime import skytools __all__ = ['MemberInfo', 'NodeInfo', 'QueueInfo'] # node types ROOT = 'root' BRANCH = 'branch' LEAF = 'leaf' class MemberInfo(object): """Info about set member.""" def __init__(self, row): self.name = row['node_name'] self.location = row['node_location'] self.dead = row['dead'] def ival2str(iv): res = "" tmp, secs = divmod(iv.seconds, 60) hrs, mins = divmod(tmp, 60) if iv.days: res += "%dd" % iv.days if hrs: res += "%dh" % hrs if mins: res += "%dm" % mins res += "%ds" % secs return res class NodeInfo(object): """Detailed info about set node.""" name = None type = None global_watermark = None local_watermark = None completed_tick = None provider_node = None provider_location = None consumer_name = None # ? worker_name = None # ? paused = False uptodate = True combined_queue = None combined_type = None last_tick = None node_attrs = {} def __init__(self, queue_name, row, main_worker=True, node_name=None): self.queue_name = queue_name self.member_map = {} self.main_worker = main_worker self.parent = None self.consumer_map = {} self.queue_info = {} self._info_lines = [] self.cascaded_consumer_map = {} self._row = row if not row: self.name = node_name self.type = 'dead' return self.name = row['node_name'] self.type = row['node_type'] self.global_watermark = row['global_watermark'] self.local_watermark = row['local_watermark'] self.completed_tick = row['worker_last_tick'] self.provider_node = row['provider_node'] self.provider_location = row['provider_location'] self.consumer_name = row['worker_name'] self.worker_name = row['worker_name'] self.paused = row['worker_paused'] self.uptodate = row['worker_uptodate'] self.combined_queue = row['combined_queue'] self.combined_type = row['combined_type'] self.last_tick = row['worker_last_tick'] self.node_attrs = {} if 'node_attrs' in row: a = row['node_attrs'] if a: self.node_attrs = skytools.db_urldecode(a) def __get_target_queue(self): qname = None if self.type == LEAF: if self.combined_queue: qname = self.combined_queue else: return None else: qname = self.queue_name if qname is None: raise Exception("no target queue") return qname def get_title(self): return "%s (%s)" % (self.name, self.type) def get_infolines(self): lst = self._info_lines lag = None if self.parent: root = self.parent while root.parent: root = root.parent cinfo = self.parent.consumer_map.get(self.consumer_name) if cinfo and root.queue_info: tick_time = cinfo['tick_time'] root_time = root.queue_info['now'] if root_time < tick_time: # ignore negative lag - probably due to info gathering # taking long time lag = datetime.timedelta(0) else: lag = root_time - tick_time elif self.queue_info: lag = self.queue_info['ticker_lag'] txt = "Lag: %s" % (lag and ival2str(lag) or "(n/a)") if self.last_tick: txt += ", Tick: %s" % self.last_tick if self.paused: txt += ", PAUSED" if not self.uptodate: txt += ", NOT UPTODATE" lst.append(txt) for k, v in self.node_attrs.items(): txt = "Attr: %s=%s" % (k, v) lst.append(txt) for cname, row in self.cascaded_consumer_map.items(): err = row['cur_error'] if err: # show only first line pos = err.find('\n') if pos > 0: err = err[:pos] lst.append("ERR: %s: %s" % (cname, err)) return lst def add_info_line(self, ln): self._info_lines.append(ln) def load_status(self, curs): self.consumer_map = {} self.queue_info = {} self.cascaded_consumer_map = {} if self.queue_name: q = "select consumer_name, current_timestamp - lag as tick_time,"\ " lag, last_seen, last_tick "\ "from pgq.get_consumer_info(%s)" curs.execute(q, [self.queue_name]) for row in curs.fetchall(): cname = row['consumer_name'] self.consumer_map[cname] = row q = "select current_timestamp - ticker_lag as tick_time,"\ " ticker_lag, current_timestamp as now "\ "from pgq.get_queue_info(%s)" curs.execute(q, [self.queue_name]) self.queue_info = curs.fetchone() q = "select * from pgq_node.get_consumer_info(%s)" curs.execute(q, [self.queue_name]) for row in curs.fetchall(): cname = row['consumer_name'] self.cascaded_consumer_map[cname] = row class QueueInfo(object): """Info about cascaded queue. Slightly broken, as all info is per-node. """ def __init__(self, queue_name, info_row, member_rows): self.local_node = NodeInfo(queue_name, info_row) self.queue_name = queue_name self.member_map = {} self.node_map = {} self.add_node(self.local_node) for r in member_rows: m = MemberInfo(r) self._add_member(m) def _add_member(self, member): self.member_map[member.name] = member def get_member(self, name): return self.member_map.get(name) def get_node(self, name): return self.node_map.get(name) def add_node(self, node): self.node_map[node.name] = node def tag_dead(self, node_name): if node_name in self.node_map: self.member_map[node_name].dead = True else: row = {'node_name': node_name, 'node_location': None, 'dead': True} m = MemberInfo(row) self.member_map[node_name] = m # # Rest is about printing the tree # _DATAFMT = "%-30s%s" def print_tree(self): """Print ascii-tree for set. Expects that data for all nodes is filled in.""" print('Queue: %s Local node: %s' % (self.queue_name, self.local_node.name)) print('') root_list = self._prepare_tree() for root in root_list: self._tree_calc(root) datalines = self._print_node(root, '', []) for ln in datalines: print(self._DATAFMT % (' ', ln)) def _print_node(self, node, pfx, datalines): # print a tree fragment for node and info # returns list of unprinted data rows for ln in datalines: print(self._DATAFMT % (_setpfx(pfx, '|'), ln)) datalines = node.get_infolines() print("%s%s" % (_setpfx(pfx, '+--: '), node.get_title())) for i, n in enumerate(node.child_list): sfx = ((i < len(node.child_list) - 1) and ' |' or ' ') datalines = self._print_node(n, pfx + sfx, datalines) return datalines def _prepare_tree(self): # reset vars, fill parent and child_list for each node # returns list of root nodes (mostly 1) for node in self.node_map.values(): node.total_childs = 0 node.levels = 0 node.child_list = [] node.parent = None root_list = [] for node in self.node_map.values(): if node.provider_node \ and node.provider_node != node.name \ and node.provider_node in self.node_map: p = self.node_map[node.provider_node] p.child_list.append(node) node.parent = p else: node.parent = None root_list.append(node) return root_list def _tree_calc(self, node): # calculate levels and count total childs # sort the tree based on them total = len(node.child_list) levels = 1 for subnode in node.child_list: self._tree_calc(subnode) total += subnode.total_childs if levels < subnode.levels + 1: levels = subnode.levels + 1 node.total_childs = total node.levels = levels node.child_list.sort(key=_node_key) def _setpfx(pfx, sfx): if pfx: pfx = pfx[:-1] + sfx return pfx def _node_key(n): return (n.levels, n.total_childs, n.name) python-pgq/pgq/cascade/worker.py000066400000000000000000000377331302126165200172340ustar00rootroot00000000000000"""Cascaded worker. CascadedConsumer that also maintains node. """ from __future__ import division, absolute_import, print_function import sys import time import skytools from pgq.cascade.consumer import CascadedConsumer from pgq.producer import bulk_insert_events from pgq.event import Event __all__ = ['CascadedWorker'] class WorkerState(object): """Depending on node state decides on actions worker needs to do.""" # node_type, # node_name, provider_node, # global_watermark, local_watermark # combined_queue, combined_type process_batch = 0 # handled in CascadedConsumer copy_events = 0 # ok global_wm_event = 0 # ok local_wm_publish = 1 # ok process_events = 0 # ok send_tick_event = 0 # ok wait_behind = 0 # ok process_tick_event = 0 # ok target_queue = '' # ok keep_event_ids = 0 # ok create_tick = 0 # ok filtered_copy = 0 # ok process_global_wm = 0 # ok sync_watermark = 0 # ? wm_sync_nodes = [] def __init__(self, queue_name, nst): self.node_type = nst['node_type'] self.node_name = nst['node_name'] self.local_watermark = nst['local_watermark'] self.global_watermark = nst['global_watermark'] self.node_attrs = {} attrs = nst.get('node_attrs', '') if attrs: self.node_attrs = skytools.db_urldecode(attrs) ntype = nst['node_type'] ctype = nst['combined_type'] if ntype == 'root': self.global_wm_event = 1 self.local_wm_publish = 0 elif ntype == 'branch': self.target_queue = queue_name self.process_batch = 1 self.process_events = 1 self.copy_events = 1 self.process_tick_event = 1 self.keep_event_ids = 1 self.create_tick = 1 if 'sync_watermark' in self.node_attrs: slist = self.node_attrs['sync_watermark'] self.sync_watermark = 1 self.wm_sync_nodes = slist.split(',') else: self.process_global_wm = 1 elif ntype == 'leaf' and not ctype: self.process_batch = 1 self.process_events = 1 elif ntype == 'leaf' and ctype: self.target_queue = nst['combined_queue'] if ctype == 'root': self.process_batch = 1 self.process_events = 1 self.copy_events = 1 self.filtered_copy = 1 self.send_tick_event = 1 elif ctype == 'branch': self.process_batch = 1 self.wait_behind = 1 else: raise Exception('invalid state 1') else: raise Exception('invalid state 2') if ctype and ntype != 'leaf': raise Exception('invalid state 3') class CascadedWorker(CascadedConsumer): """CascadedWorker base class. Config fragment:: ## Parameters for pgq.CascadedWorker ## # how often the root node should push wm downstream (seconds) #global_wm_publish_period = 300 # how often the nodes should report their wm upstream (seconds) #local_wm_publish_period = 300 """ global_wm_publish_time = 0 global_wm_publish_period = 5 * 60 local_wm_publish_time = 0 local_wm_publish_period = 5 * 60 max_evbuf = 500 cur_event_seq = 0 cur_max_id = 0 seq_buffer = 10000 main_worker = True _worker_state = None ev_buf = [] real_global_wm = None def __init__(self, service_name, db_name, args): """Initialize new consumer. @param service_name: service_name for DBScript @param db_name: target database name for get_database() @param args: cmdline args for DBScript """ super(CascadedWorker, self).__init__(service_name, db_name, args) def reload(self): super(CascadedWorker, self).reload() self.global_wm_publish_period = self.cf.getfloat('global_wm_publish_period', CascadedWorker.global_wm_publish_period) self.local_wm_publish_period = self.cf.getfloat('local_wm_publish_period', CascadedWorker.local_wm_publish_period) def process_remote_batch(self, src_db, tick_id, event_list, dst_db): """Worker-specific event processing.""" self.ev_buf = [] max_id = 0 st = self._worker_state if st.wait_behind: self.wait_for_tick(dst_db, tick_id) src_curs = src_db.cursor() dst_curs = dst_db.cursor() for ev in event_list: if st.copy_events: self.copy_event(dst_curs, ev, st.filtered_copy) if ev.ev_type.split('.', 1)[0] in ("pgq", "londiste"): # process cascade events even on waiting leaf node self.process_remote_event(src_curs, dst_curs, ev) else: if st.process_events: self.process_remote_event(src_curs, dst_curs, ev) if ev.ev_id > max_id: max_id = ev.ev_id if max_id > self.cur_max_id: self.cur_max_id = max_id def wait_for_tick(self, dst_db, tick_id): """On combined-branch leaf needs to wait from tick to appear from combined-root. """ while 1: cst = self._consumer_state if cst['completed_tick'] >= tick_id: return self.sleep(10 * self.loop_delay) self._consumer_state = self.refresh_state(dst_db) if not self.looping: sys.exit(0) def is_batch_done(self, state, batch_info, dst_db): wst = self._worker_state # on combined-branch the target can get several batches ahead if wst.wait_behind: # let the wait-behind logic track ticks return False # check if events have processed done = super(CascadedWorker, self).is_batch_done(state, batch_info, dst_db) if not wst.create_tick: return done if not done: return False # check if tick is done - it happens in separate tx # fetch last tick from target queue q = "select t.tick_id from pgq.tick t, pgq.queue q"\ " where t.tick_queue = q.queue_id and q.queue_name = %s"\ " order by t.tick_queue desc, t.tick_id desc"\ " limit 1" curs = dst_db.cursor() curs.execute(q, [self.queue_name]) last_tick = curs.fetchone()['tick_id'] dst_db.commit() # insert tick if missing cur_tick = batch_info['tick_id'] if last_tick != cur_tick: prev_tick = batch_info['prev_tick_id'] tick_time = batch_info['batch_end'] if last_tick != prev_tick: raise Exception('is_batch_done: last branch tick = %d, expected %d or %d' % ( last_tick, prev_tick, cur_tick)) self.create_branch_tick(dst_db, cur_tick, tick_time) return True def publish_local_wm(self, src_db, dst_db): """Send local watermark to provider. """ t = time.time() if t - self.local_wm_publish_time < self.local_wm_publish_period: return st = self._worker_state wm = st.local_watermark if st.sync_watermark: # dont send local watermark upstream wm = self.batch_info['prev_tick_id'] elif wm > self.batch_info['cur_tick_id']: # in wait-behind-leaf case, the wm from target can be # ahead from source queue, use current batch then wm = self.batch_info['cur_tick_id'] self.log.debug("Publishing local watermark: %d", wm) src_curs = src_db.cursor() q = "select * from pgq_node.set_subscriber_watermark(%s, %s, %s)" src_curs.execute(q, [self.pgq_queue_name, st.node_name, wm]) src_db.commit() # if next part fails, dont repeat it immediately self.local_wm_publish_time = t if st.sync_watermark and self.real_global_wm is not None: # instead sync 'global-watermark' with specific nodes dst_curs = dst_db.cursor() nmap = self._get_node_map(dst_curs) dst_db.commit() # local lowest wm = st.local_watermark # the global-watermark in subtree can stay behind # upstream global-watermark, but must not go ahead if self.real_global_wm < wm: wm = self.real_global_wm for node in st.wm_sync_nodes: if node == st.node_name: continue if node not in nmap: # dont ignore missing nodes - cluster may be partially set up self.log.warning('Unknown node in sync_watermark list: %s', node) return n = nmap[node] if n['dead']: # ignore dead nodes continue wmdb = self.get_database('wmdb', connstr=n['node_location'], autocommit=1, profile='remote') wmcurs = wmdb.cursor() q = 'select local_watermark from pgq_node.get_node_info(%s)' wmcurs.execute(q, [self.queue_name]) row = wmcurs.fetchone() if not row: # partially set up node? self.log.warning('Node not working: %s', node) elif row['local_watermark'] < wm: # keep lowest wm wm = row['local_watermark'] self.close_database('wmdb') # now we have lowest wm, store it q = "select pgq_node.set_global_watermark(%s, %s)" dst_curs.execute(q, [self.queue_name, wm]) dst_db.commit() def _get_node_map(self, curs): q = "select node_name, node_location, dead from pgq_node.get_queue_locations(%s)" curs.execute(q, [self.queue_name]) res = {} for row in curs.fetchall(): res[row['node_name']] = row return res def process_remote_event(self, src_curs, dst_curs, ev): """Handle cascading events. """ if ev.retry: raise Exception('CascadedWorker must not get retry events') # non cascade events send to CascadedConsumer to error out if ev.ev_type[:4] != 'pgq.': super(CascadedWorker, self).process_remote_event(src_curs, dst_curs, ev) return # ignore cascade events if not main worker if not self.main_worker: return # check if for right queue t = ev.ev_type if ev.ev_extra1 != self.pgq_queue_name and t != "pgq.tick-id": raise Exception("bad event in queue: " + str(ev)) self.log.debug("got cascade event: %s(%s)", t, ev.ev_data) st = self._worker_state if t == "pgq.location-info": node = ev.ev_data loc = ev.ev_extra2 dead = ev.ev_extra3 q = "select * from pgq_node.register_location(%s, %s, %s, %s)" dst_curs.execute(q, [self.pgq_queue_name, node, loc, dead]) elif t == "pgq.unregister-location": node = ev.ev_data q = "select * from pgq_node.unregister_location(%s, %s)" dst_curs.execute(q, [self.pgq_queue_name, node]) elif t == "pgq.global-watermark": if st.sync_watermark: tick_id = int(ev.ev_data) self.log.debug('Half-ignoring global watermark %d', tick_id) self.real_global_wm = tick_id elif st.process_global_wm: tick_id = int(ev.ev_data) q = "select * from pgq_node.set_global_watermark(%s, %s)" dst_curs.execute(q, [self.pgq_queue_name, tick_id]) elif t == "pgq.tick-id": tick_id = int(ev.ev_data) if ev.ev_extra1 == self.pgq_queue_name: raise Exception('tick-id event for own queue?') if st.process_tick_event: q = "select * from pgq_node.set_partition_watermark(%s, %s, %s)" dst_curs.execute(q, [self.pgq_queue_name, ev.ev_extra1, tick_id]) else: raise Exception("unknown cascade event: %s" % t) def finish_remote_batch(self, src_db, dst_db, tick_id): """Worker-specific cleanup on target node. """ # merge-leaf on branch should not update tick pos st = self._worker_state if st.wait_behind: dst_db.commit() # still need to publish wm info if st.local_wm_publish and self.main_worker: self.publish_local_wm(src_db, dst_db) return if self.main_worker: dst_curs = dst_db.cursor() self.flush_events(dst_curs) # send tick event into queue if st.send_tick_event: q = "select pgq.insert_event(%s, 'pgq.tick-id', %s, %s, null, null, null)" dst_curs.execute(q, [st.target_queue, str(tick_id), self.pgq_queue_name]) super(CascadedWorker, self).finish_remote_batch(src_db, dst_db, tick_id) if self.main_worker: if st.create_tick: # create actual tick tick_id = self.batch_info['tick_id'] tick_time = self.batch_info['batch_end'] self.create_branch_tick(dst_db, tick_id, tick_time) if st.local_wm_publish: self.publish_local_wm(src_db, dst_db) def create_branch_tick(self, dst_db, tick_id, tick_time): q = "select pgq.ticker(%s, %s, %s, %s)" # execute it in autocommit mode ilev = dst_db.isolation_level dst_db.set_isolation_level(0) dst_curs = dst_db.cursor() dst_curs.execute(q, [self.pgq_queue_name, tick_id, tick_time, self.cur_max_id]) dst_db.set_isolation_level(ilev) def copy_event(self, dst_curs, ev, filtered_copy): """Add event to copy buffer. """ if not self.main_worker: return if filtered_copy: if ev.type[:4] == "pgq.": return if len(self.ev_buf) >= self.max_evbuf: self.flush_events(dst_curs) if ev.type == 'pgq.global-watermark': st = self._worker_state if st.sync_watermark: # replace payload with synced global watermark row = ev._event_row.copy() row['ev_data'] = str(st.global_watermark) ev = Event(self.queue_name, row) self.ev_buf.append(ev) def flush_events(self, dst_curs): """Send copy buffer to target queue. """ if len(self.ev_buf) == 0: return flds = ['ev_time', 'ev_type', 'ev_data', 'ev_extra1', 'ev_extra2', 'ev_extra3', 'ev_extra4'] st = self._worker_state if st.keep_event_ids: flds.append('ev_id') bulk_insert_events(dst_curs, self.ev_buf, flds, st.target_queue) self.ev_buf = [] def refresh_state(self, dst_db, full_logic=True): """Load also node state from target node. """ res = super(CascadedWorker, self).refresh_state(dst_db, full_logic) q = "select * from pgq_node.get_node_info(%s)" st = self.exec_cmd(dst_db, q, [self.pgq_queue_name]) self._worker_state = WorkerState(self.pgq_queue_name, st[0]) return res def process_root_node(self, dst_db): """On root node send global watermark downstream. """ super(CascadedWorker, self).process_root_node(dst_db) t = time.time() if t - self.global_wm_publish_time < self.global_wm_publish_period: return self.log.debug("Publishing global watermark") dst_curs = dst_db.cursor() q = "select * from pgq_node.set_global_watermark(%s, NULL)" dst_curs.execute(q, [self.pgq_queue_name]) dst_db.commit() self.global_wm_publish_time = t python-pgq/pgq/consumer.py000066400000000000000000000075031302126165200161630ustar00rootroot00000000000000 """PgQ consumer framework for Python. """ from __future__ import division, absolute_import, print_function from pgq.baseconsumer import BaseConsumer, BaseBatchWalker from pgq.event import Event __all__ = ['Consumer'] # Event status codes EV_UNTAGGED = -1 EV_RETRY = 0 EV_DONE = 1 class RetriableEvent(Event): """Event which can be retried Consumer is supposed to tag them after processing. """ __slots__ = ('_status', ) def __init__(self, queue_name, row): super(RetriableEvent, self).__init__(queue_name, row) self._status = EV_DONE def tag_done(self): self._status = EV_DONE def get_status(self): return self._status def tag_retry(self, retry_time=60): self._status = EV_RETRY self.retry_time = retry_time class RetriableWalkerEvent(RetriableEvent): """Redirects status flags to RetriableBatchWalker. That way event data can be gc'd immediately and tag_done() events don't need to be remembered. """ __slots__ = ('_walker', ) def __init__(self, walker, queue_name, row): super(RetriableWalkerEvent, self).__init__(queue_name, row) self._walker = walker def tag_done(self): self._walker.tag_event_done(self) def get_status(self): self._walker.get_status(self) def tag_retry(self, retry_time=60): self._walker.tag_event_retry(self, retry_time) class RetriableBatchWalker(BaseBatchWalker): """BatchWalker that returns RetriableEvents """ def __init__(self, curs, batch_id, queue_name, fetch_size=300, consumer_filter=None): super(RetriableBatchWalker, self).__init__(curs, batch_id, queue_name, fetch_size, consumer_filter) self.status_map = {} def _make_event(self, queue_name, row): return RetriableWalkerEvent(self, queue_name, row) def tag_event_done(self, event): if event.id in self.status_map: del self.status_map[event.id] def tag_event_retry(self, event, retry_time): self.status_map[event.id] = (EV_RETRY, retry_time) def get_status(self, event): return self.status_map.get(event.id, (EV_DONE, 0))[0] def iter_status(self): for res in self.status_map.items(): yield res class Consumer(BaseConsumer): """Normal consumer base class. Can retry events """ _batch_walker_class = RetriableBatchWalker def _make_event(self, queue_name, row): return RetriableEvent(queue_name, row) def _flush_retry(self, curs, batch_id, ev_list): """Tag retry events.""" retry = 0 if self.pgq_lazy_fetch: for ev_id, stat in ev_list.iter_status(): if stat[0] == EV_RETRY: self._tag_retry(curs, batch_id, ev_id, stat[1]) retry += 1 elif stat[0] != EV_DONE: raise Exception("Untagged event: id=%d" % ev_id) else: for ev in ev_list: if ev._status == EV_RETRY: self._tag_retry(curs, batch_id, ev.id, ev.retry_time) retry += 1 elif ev._status != EV_DONE: raise Exception("Untagged event: (id=%d, type=%s, data=%s, ex1=%s" % ( ev.id, ev.type, ev.data, ev.extra1)) # report weird events if retry: self.stat_increase('retry-events', retry) def _finish_batch(self, curs, batch_id, ev_list): """Tag events and notify that the batch is done.""" self._flush_retry(curs, batch_id, ev_list) super(Consumer, self)._finish_batch(curs, batch_id, ev_list) def _tag_retry(self, cx, batch_id, ev_id, retry_time): """Tag event for retry. (internal)""" cx.execute("select pgq.event_retry(%s, %s, %s)", [batch_id, ev_id, retry_time]) python-pgq/pgq/coopconsumer.py000066400000000000000000000050421302126165200170400ustar00rootroot00000000000000 """PgQ cooperative consumer for Python. """ from __future__ import division, absolute_import, print_function from pgq.consumer import Consumer __all__ = ['CoopConsumer'] class CoopConsumer(Consumer): """Cooperative Consumer base class. There will be one dbscript process per subconsumer. Config params:: ## pgq.CoopConsumer # name for subconsumer subconsumer_name = # pgsql interval when to consider parallel subconsumers dead, # and take over their unfinished batch #subconsumer_timeout = 1 hour """ def __init__(self, service_name, db_name, args): """Initialize new subconsumer. @param service_name: service_name for DBScript @param db_name: name of database for get_database() @param args: cmdline args for DBScript """ super(CoopConsumer, self).__init__(service_name, db_name, args) self.subconsumer_name = self.cf.get("subconsumer_name") self.subconsumer_timeout = self.cf.get("subconsumer_timeout", "") def register_consumer(self): """Registration for subconsumer.""" self.log.info("Registering consumer on source queue") db = self.get_database(self.db_name) cx = db.cursor() cx.execute("select pgq_coop.register_subconsumer(%s, %s, %s)", [self.queue_name, self.consumer_name, self.subconsumer_name]) res = cx.fetchone()[0] db.commit() return res def unregister_consumer(self): """Unregistration for subconsumer.""" self.log.info("Unregistering consumer from source queue") db = self.get_database(self.db_name) cx = db.cursor() cx.execute("select pgq_coop.unregister_subconsumer(%s, %s, %s, 0)", [self.queue_name, self.consumer_name, self.subconsumer_name]) db.commit() def _load_next_batch(self, curs): """Allocate next batch. (internal)""" if self.subconsumer_timeout: q = "select pgq_coop.next_batch(%s, %s, %s, %s)" curs.execute(q, [self.queue_name, self.consumer_name, self.subconsumer_name, self.subconsumer_timeout]) else: q = "select pgq_coop.next_batch(%s, %s, %s)" curs.execute(q, [self.queue_name, self.consumer_name, self.subconsumer_name]) return curs.fetchone()[0] def _finish_batch(self, curs, batch_id, ev_list): """Finish batch. (internal)""" self._flush_retry(curs, batch_id, ev_list) curs.execute("select pgq_coop.finish_batch(%s)", [batch_id]) python-pgq/pgq/event.py000066400000000000000000000041251302126165200154460ustar00rootroot00000000000000 """PgQ event container. """ from __future__ import division, absolute_import, print_function __all__ = ['Event'] _fldmap = { 'ev_id': 'ev_id', 'ev_txid': 'ev_txid', 'ev_time': 'ev_time', 'ev_type': 'ev_type', 'ev_data': 'ev_data', 'ev_extra1': 'ev_extra1', 'ev_extra2': 'ev_extra2', 'ev_extra3': 'ev_extra3', 'ev_extra4': 'ev_extra4', 'ev_retry': 'ev_retry', 'id': 'ev_id', 'txid': 'ev_txid', 'time': 'ev_time', 'type': 'ev_type', 'data': 'ev_data', 'extra1': 'ev_extra1', 'extra2': 'ev_extra2', 'extra3': 'ev_extra3', 'extra4': 'ev_extra4', 'retry': 'ev_retry', } class Event(object): """Event data for consumers. Will be removed from the queue by default. """ __slots__ = ('_event_row', 'retry_time', 'queue_name') def __init__(self, queue_name, row): self._event_row = row self.retry_time = 60 self.queue_name = queue_name def __getattr__(self, key): return self._event_row[_fldmap[key]] # would be better in RetriableEvent only since we don't care but # unfortunately it needs to be defined here due to compatibility concerns def tag_done(self): pass # be also dict-like def __getitem__(self, k): return self._event_row.__getitem__(k) def __contains__(self, k): return self._event_row.__contains__(k) def get(self, k, d=None): return self._event_row.get(k, d) def has_key(self, k): return self._event_row.has_key(k) def keys(self): return self._event_row.keys() def values(self): return self._event_row.keys() def items(self): return self._event_row.items() def iterkeys(self): return self._event_row.iterkeys() def itervalues(self): return self._event_row.itervalues() def __str__(self): return "" % ( self.id, self.type, self.data, self.extra1, self.extra2, self.extra3, self.extra4) python-pgq/pgq/localconsumer.py000066400000000000000000000161731302126165200172010ustar00rootroot00000000000000 """ Consumer that stores last applied position in local file. For cases where the consumer cannot use single database for remote tracking. To be subclassed, then override .process_local_batch() or .process_local_event() methods. """ from __future__ import division, absolute_import, print_function import sys import os import errno import skytools from pgq.baseconsumer import BaseConsumer __all__ = ['LocalConsumer'] class LocalConsumer(BaseConsumer): """Consumer that applies batches sequentially in second database. Requirements: - Whole batch in one TX. - Must not use retry queue. Features: - Can detect if several batches are already applied to dest db. - If some ticks are lost, allows to seek back on queue. Whether it succeeds, depends on pgq configuration. Config options:: ## Parameters for LocalConsumer ## # file location where last applied tick is tracked local_tracking_file = ~/state/%(job_name)s.tick """ def reload(self): super(LocalConsumer, self).reload() self.local_tracking_file = self.cf.getfile('local_tracking_file') if not os.path.exists(os.path.dirname(self.local_tracking_file)): raise skytools.UsageError("path does not exist: %s" % self.local_tracking_file) def init_optparse(self, parser=None): p = super(LocalConsumer, self).init_optparse(parser) p.add_option("--rewind", action="store_true", help="change queue position according to local tick") p.add_option("--reset", action="store_true", help="reset local tick based on queue position") return p def startup(self): if self.options.rewind: self.rewind() sys.exit(0) if self.options.reset: self.dst_reset() sys.exit(0) super(LocalConsumer, self).startup() self.check_queue() def check_queue(self): queue_tick = -1 local_tick = self.load_local_tick() db = self.get_database(self.db_name) curs = db.cursor() q = "select last_tick from pgq.get_consumer_info(%s, %s)" curs.execute(q, [self.queue_name, self.consumer_name]) rows = curs.fetchall() if len(rows) == 1: queue_tick = rows[0]['last_tick'] db.commit() if queue_tick < 0: if local_tick >= 0: self.log.info("Registering consumer at tick %d", local_tick) q = "select * from pgq.register_consumer_at(%s, %s, %s)" curs.execute(q, [self.queue_name, self.consumer_name, local_tick]) else: self.log.info("Registering consumer at queue top") q = "select * from pgq.register_consumer(%s, %s)" curs.execute(q, [self.queue_name, self.consumer_name]) elif local_tick < 0: self.log.info("Local tick missing, storing queue tick %d", queue_tick) self.save_local_tick(queue_tick) elif local_tick > queue_tick: self.log.warning("Tracking out of sync: queue=%d local=%d. Repositioning on queue. [Database failure?]", queue_tick, local_tick) q = "select * from pgq.register_consumer_at(%s, %s, %s)" curs.execute(q, [self.queue_name, self.consumer_name, local_tick]) elif local_tick < queue_tick: self.log.warning("Tracking out of sync: queue=%d local=%d. Rewinding queue. [Lost file data?]", queue_tick, local_tick) q = "select * from pgq.register_consumer_at(%s, %s, %s)" curs.execute(q, [self.queue_name, self.consumer_name, local_tick]) else: self.log.info("Ticks match: Queue=%d Local=%d", queue_tick, local_tick) def work(self): if self.work_state < 0: self.check_queue() return super(LocalConsumer, self).work() def process_batch(self, db, batch_id, event_list): """Process all events in batch. """ # check if done if self.is_batch_done(): return # actual work self.process_local_batch(db, batch_id, event_list) # finish work self.set_batch_done() def process_local_batch(self, db, batch_id, event_list): """Overridable method to process whole batch.""" for ev in event_list: self.process_local_event(db, batch_id, ev) def process_local_event(self, db, batch_id, ev): """Overridable method to process one event at a time.""" raise Exception('process_local_event not implemented') def is_batch_done(self): """Helper function to keep track of last successful batch in external database. """ local_tick = self.load_local_tick() cur_tick = self.batch_info['tick_id'] prev_tick = self.batch_info['prev_tick_id'] if local_tick < 0: # seems this consumer has not run yet? return False if prev_tick == local_tick: # on track return False if cur_tick == local_tick: # current batch is already applied, skip it return True # anything else means problems raise Exception('Lost position: batch %d..%d, dst has %d' % ( prev_tick, cur_tick, local_tick)) def set_batch_done(self): """Helper function to set last successful batch in external database. """ tick_id = self.batch_info['tick_id'] self.save_local_tick(tick_id) def register_consumer(self): new = super(LocalConsumer, self).register_consumer() if new: # fixme self.dst_reset() def unregister_consumer(self): """If unregistering, also clean completed tick table on dest.""" super(LocalConsumer, self).unregister_consumer() self.dst_reset() def rewind(self): dst_tick = self.load_local_tick() if dst_tick >= 0: src_db = self.get_database(self.db_name) src_curs = src_db.cursor() self.log.info("Rewinding queue to local tick %d", dst_tick) q = "select pgq.register_consumer_at(%s, %s, %s)" src_curs.execute(q, [self.queue_name, self.consumer_name, dst_tick]) src_db.commit() else: self.log.error('Cannot rewind, no tick found in local file') def dst_reset(self): self.log.info("Removing local tracking file") try: os.remove(self.local_tracking_file) except: pass def load_local_tick(self): """Reads stored tick or -1.""" try: f = open(self.local_tracking_file, 'r') buf = f.read() f.close() data = buf.strip() if data: tick_id = int(data) else: tick_id = -1 return tick_id except IOError as ex: if ex.errno == errno.ENOENT: return -1 raise def save_local_tick(self, tick_id): """Store tick in local file.""" data = str(tick_id) skytools.write_atomic(self.local_tracking_file, data) python-pgq/pgq/producer.py000066400000000000000000000022531302126165200161500ustar00rootroot00000000000000 """PgQ producer helpers for Python. """ from __future__ import division, absolute_import, print_function import skytools __all__ = ['bulk_insert_events', 'insert_event'] _fldmap = { 'id': 'ev_id', 'time': 'ev_time', 'type': 'ev_type', 'data': 'ev_data', 'extra1': 'ev_extra1', 'extra2': 'ev_extra2', 'extra3': 'ev_extra3', 'extra4': 'ev_extra4', 'ev_id': 'ev_id', 'ev_time': 'ev_time', 'ev_type': 'ev_type', 'ev_data': 'ev_data', 'ev_extra1': 'ev_extra1', 'ev_extra2': 'ev_extra2', 'ev_extra3': 'ev_extra3', 'ev_extra4': 'ev_extra4', } def bulk_insert_events(curs, rows, fields, queue_name): q = "select pgq.current_event_table(%s)" curs.execute(q, [queue_name]) tbl = curs.fetchone()[0] db_fields = map(_fldmap.get, fields) skytools.magic_insert(curs, tbl, rows, db_fields) def insert_event(curs, queue, ev_type, ev_data, extra1=None, extra2=None, extra3=None, extra4=None): q = "select pgq.insert_event(%s, %s, %s, %s, %s, %s, %s)" curs.execute(q, [queue, ev_type, ev_data, extra1, extra2, extra3, extra4]) return curs.fetchone()[0] python-pgq/pgq/remoteconsumer.py000066400000000000000000000135611302126165200174000ustar00rootroot00000000000000 """ old RemoteConsumer / SerialConsumer classes. """ from __future__ import division, absolute_import, print_function import sys from pgq.consumer import Consumer __all__ = ['RemoteConsumer', 'SerialConsumer'] class RemoteConsumer(Consumer): """Helper for doing event processing in another database. Requires that whole batch is processed in one TX. """ def __init__(self, service_name, db_name, remote_db, args): super(RemoteConsumer, self).__init__(service_name, db_name, args) self.remote_db = remote_db def process_batch(self, db, batch_id, event_list): """Process all events in batch. By default calls process_event for each. """ dst_db = self.get_database(self.remote_db) curs = dst_db.cursor() if self.is_last_batch(curs, batch_id): return self.process_remote_batch(db, batch_id, event_list, dst_db) self.set_last_batch(curs, batch_id) dst_db.commit() def is_last_batch(self, dst_curs, batch_id): """Helper function to keep track of last successful batch in external database. """ q = "select pgq_ext.is_batch_done(%s, %s)" dst_curs.execute(q, [self.consumer_name, batch_id]) return dst_curs.fetchone()[0] def set_last_batch(self, dst_curs, batch_id): """Helper function to set last successful batch in external database. """ q = "select pgq_ext.set_batch_done(%s, %s)" dst_curs.execute(q, [self.consumer_name, batch_id]) def process_remote_batch(self, db, batch_id, event_list, dst_db): raise Exception('process_remote_batch not implemented') class SerialConsumer(Consumer): """Consumer that applies batches sequentially in second database. Requirements: - Whole batch in one TX. - Must not use retry queue. Features: - Can detect if several batches are already applied to dest db. - If some ticks are lost. allows to seek back on queue. Whether it succeeds, depends on pgq configuration. """ def __init__(self, service_name, db_name, remote_db, args): super(SerialConsumer, self).__init__(service_name, db_name, args) self.remote_db = remote_db self.dst_schema = "pgq_ext" def startup(self): if self.options.rewind: self.rewind() sys.exit(0) if self.options.reset: self.dst_reset() sys.exit(0) return Consumer.startup(self) def init_optparse(self, parser=None): p = super(SerialConsumer, self).init_optparse(parser) p.add_option("--rewind", action="store_true", help="change queue position according to destination") p.add_option("--reset", action="store_true", help="reset queue pos on destination side") return p def process_batch(self, db, batch_id, event_list): """Process all events in batch. """ dst_db = self.get_database(self.remote_db) curs = dst_db.cursor() # check if done if self.is_batch_done(curs): return # actual work self.process_remote_batch(db, batch_id, event_list, dst_db) # finish work self.set_batch_done(curs) dst_db.commit() def is_batch_done(self, dst_curs): """Helper function to keep track of last successful batch in external database. """ cur_tick = self.batch_info['tick_id'] prev_tick = self.batch_info['prev_tick_id'] dst_tick = self.get_last_tick(dst_curs) if not dst_tick: # seems this consumer has not run yet against dst_db return False if prev_tick == dst_tick: # on track return False if cur_tick == dst_tick: # current batch is already applied, skip it return True # anything else means problems raise Exception('Lost position: batch %d..%d, dst has %d' % ( prev_tick, cur_tick, dst_tick)) def set_batch_done(self, dst_curs): """Helper function to set last successful batch in external database. """ tick_id = self.batch_info['tick_id'] self.set_last_tick(dst_curs, tick_id) def register_consumer(self): new = Consumer.register_consumer(self) if new: # fixme self.dst_reset() def unregister_consumer(self): """If unregistering, also clean completed tick table on dest.""" Consumer.unregister_consumer(self) self.dst_reset() def process_remote_batch(self, db, batch_id, event_list, dst_db): raise Exception('process_remote_batch not implemented') def rewind(self): self.log.info("Rewinding queue") src_db = self.get_database(self.db_name) dst_db = self.get_database(self.remote_db) src_curs = src_db.cursor() dst_curs = dst_db.cursor() dst_tick = self.get_last_tick(dst_curs) if dst_tick: q = "select pgq.register_consumer_at(%s, %s, %s)" src_curs.execute(q, [self.queue_name, self.consumer_name, dst_tick]) else: self.log.warning('No tick found on dst side') dst_db.commit() src_db.commit() def dst_reset(self): self.log.info("Resetting queue tracking on dst side") dst_db = self.get_database(self.remote_db) dst_curs = dst_db.cursor() self.set_last_tick(dst_curs, None) dst_db.commit() def get_last_tick(self, dst_curs): q = "select %s.get_last_tick(%%s)" % self.dst_schema dst_curs.execute(q, [self.consumer_name]) res = dst_curs.fetchone() return res[0] def set_last_tick(self, dst_curs, tick_id): q = "select %s.set_last_tick(%%s, %%s)" % self.dst_schema dst_curs.execute(q, [self.consumer_name, tick_id]) python-pgq/pgq/status.py000066400000000000000000000065351302126165200156570ustar00rootroot00000000000000 """Status display. """ from __future__ import division, absolute_import, print_function import sys import skytools __all__ = ['PGQStatus'] def ival(data, _as=None): "Format interval for output" if not _as: _as = data.split('.')[-1] numfmt = 'FM9999999' expr = "coalesce(to_char(extract(epoch from %s), '%s') || 's', 'NULL') as %s" return expr % (data, numfmt, _as) class PGQStatus(skytools.DBScript): """Info gathering and display.""" def __init__(self, args, check=0): super(PGQStatus, self).__init__('pgqadm', args) self.show_status() sys.exit(0) def show_status(self): db = self.get_database("db", autocommit=1) cx = db.cursor() cx.execute("show server_version") pgver = cx.fetchone()[0] cx.execute("select pgq.version()") qver = cx.fetchone()[0] print("Postgres version: %s PgQ version: %s" % (pgver, qver)) q = """select f.queue_name, f.queue_ntables, %s, %s, %s, %s, q.queue_ticker_max_count, f.ev_per_sec, f.ev_new from pgq.get_queue_info() f, pgq.queue q where q.queue_name = f.queue_name""" % ( ival('f.queue_rotation_period'), ival('f.ticker_lag'), ival('q.queue_ticker_max_lag'), ival('q.queue_ticker_idle_period'), ) cx.execute(q) event_rows = cx.fetchall() q = """select queue_name, consumer_name, %s, %s, pending_events from pgq.get_consumer_info()""" % ( ival('lag'), ival('last_seen'), ) cx.execute(q) consumer_rows = cx.fetchall() print("\n%-33s %9s %13s %6s %6s %5s" % ('Event queue', 'Rotation', 'Ticker', 'TLag', 'EPS', 'New')) print('-' * 78) for ev_row in event_rows: tck = "%s/%s/%s" % (ev_row['queue_ticker_max_count'], ev_row['queue_ticker_max_lag'], ev_row['queue_ticker_idle_period']) rot = "%s/%s" % (ev_row['queue_ntables'], ev_row['queue_rotation_period']) print("%-33s %9s %13s %6s %6.1f %5d" % ( ev_row['queue_name'], rot, tck, ev_row['ticker_lag'], ev_row['ev_per_sec'], ev_row['ev_new'], )) print('-' * 78) print("\n%-48s %9s %9s %8s" % ( 'Consumer', 'Lag', 'LastSeen', 'Pending')) print('-' * 78) for ev_row in event_rows: cons = self.pick_consumers(ev_row, consumer_rows) self.show_queue(ev_row, cons) print('-' * 78) db.commit() def show_consumer(self, cons): print(" %-46s %9s %9s %8d" % ( cons['consumer_name'], cons['lag'], cons['last_seen'], cons['pending_events'])) def show_queue(self, ev_row, consumer_rows): print("%(queue_name)s:" % ev_row) for cons in consumer_rows: self.show_consumer(cons) def pick_consumers(self, ev_row, consumer_rows): res = [] for con in consumer_rows: if con['queue_name'] != ev_row['queue_name']: continue res.append(con) return res python-pgq/setup.py000066400000000000000000000021261302126165200146750ustar00rootroot00000000000000"""Setup for pgq module. """ from setuptools import setup setup( name = "pgq", description = "PgQ consumer for Python", version = '3.3', license = "ISC", url = "https://github.com/pgq/python-pgq", maintainer = "Marko Kreen", maintainer_email = "markokr@gmail.com", packages = ['pgq', 'pgq.cascade'], install_requires = ['skytools', 'psycopg2'], classifiers = [ "Development Status :: 5 - Production/Stable", "Environment :: Console", "Intended Audience :: Developers", "License :: OSI Approved :: ISC License (ISCL)", "Operating System :: MacOS :: MacOS X", "Operating System :: Microsoft :: Windows", "Operating System :: POSIX", "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Topic :: Database", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Utilities", ] ) python-pgq/tests/000077500000000000000000000000001302126165200143245ustar00rootroot00000000000000python-pgq/tests/test_pgq.py000066400000000000000000000002421302126165200165220ustar00rootroot00000000000000 import pgq from skytools import natsort_key from nose.tools import * def test_version(): assert_true(natsort_key(pgq.__version__) >= natsort_key('3.3')) python-pgq/tox.ini000066400000000000000000000015721302126165200145020ustar00rootroot00000000000000 [tox] envlist = lint2,lint3,py27,py35 [package] name = pgq deps = psycopg2 ../python-skytools [testenv] changedir = {envsitepackagesdir} deps = nose coverage {[package]deps} commands = coverage erase coverage run --rcfile "{toxinidir}/.coveragerc" --include "{[package]name}/*" \ -m nose -P --with-doctest --all-modules {[package]name} "{toxinidir}/tests" coverage html -d "{toxinidir}/tmp/cover-{envname}" \ --title "Coverage for {envname}" \ --rcfile "{toxinidir}/.coveragerc" coverage report --rcfile "{toxinidir}/.coveragerc" [testenv:lint2] basepython = python2.7 deps = prospector[with_everything] {[package]deps} commands = prospector --profile {toxinidir}/.prospector.yaml {[package]name} [testenv:lint3] basepython = python3.5 deps = prospector[with_everything] {[package]deps} commands = prospector --profile {toxinidir}/.prospector.yaml {[package]name}