confluent-kafka-0.11.0/0000755000076600000240000000000013135737034015265 5ustar magnusstaff00000000000000confluent-kafka-0.11.0/confluent_kafka/0000755000076600000240000000000013135737034020417 5ustar magnusstaff00000000000000confluent-kafka-0.11.0/confluent_kafka/__init__.py0000644000076600000240000000110313107245476022526 0ustar magnusstaff00000000000000__all__ = ['cimpl', 'avro', 'kafkatest'] from .cimpl import (Consumer, # noqa KafkaError, KafkaException, Message, Producer, TopicPartition, libversion, version, TIMESTAMP_NOT_AVAILABLE, TIMESTAMP_CREATE_TIME, TIMESTAMP_LOG_APPEND_TIME, OFFSET_BEGINNING, OFFSET_END, OFFSET_STORED, OFFSET_INVALID) confluent-kafka-0.11.0/confluent_kafka/avro/0000755000076600000240000000000013135737034021366 5ustar magnusstaff00000000000000confluent-kafka-0.11.0/confluent_kafka/avro/__init__.py0000644000076600000240000001177513107245476023515 0ustar magnusstaff00000000000000""" Avro schema registry module: Deals with encoding and decoding of messages with avro schemas """ from confluent_kafka import Producer, Consumer from confluent_kafka.avro.error import ClientError from confluent_kafka.avro.load import load, loads # noqa from confluent_kafka.avro.cached_schema_registry_client import CachedSchemaRegistryClient from confluent_kafka.avro.serializer import (SerializerError, # noqa KeySerializerError, ValueSerializerError) from confluent_kafka.avro.serializer.message_serializer import MessageSerializer class AvroProducer(Producer): """ Kafka Producer client which does avro schema encoding to messages. Handles schema registration, Message serialization. Constructor takes below parameters @:param: config: dict object with config parameters containing url for schema registry (schema.registry.url). @:param: default_key_schema: Optional avro schema for key @:param: default_value_schema: Optional avro schema for value """ def __init__(self, config, default_key_schema=None, default_value_schema=None, schema_registry=None): schema_registry_url = config.pop("schema.registry.url", None) if schema_registry is None: if schema_registry_url is None: raise ValueError("Missing parameter: schema.registry.url") schema_registry = CachedSchemaRegistryClient(url=schema_registry_url) elif schema_registry_url is not None: raise ValueError("Cannot pass schema_registry along with schema.registry.url config") super(AvroProducer, self).__init__(config) self._serializer = MessageSerializer(schema_registry) self._key_schema = default_key_schema self._value_schema = default_value_schema def produce(self, **kwargs): """ Sends message to kafka by encoding with specified avro schema @:param: topic: topic name @:param: value: An object to serialize @:param: value_schema : Avro schema for value @:param: key: An object to serialize @:param: key_schema : Avro schema for key @:exception: SerializerError """ # get schemas from kwargs if defined key_schema = kwargs.pop('key_schema', self._key_schema) value_schema = kwargs.pop('value_schema', self._value_schema) topic = kwargs.pop('topic', None) if not topic: raise ClientError("Topic name not specified.") value = kwargs.pop('value', None) key = kwargs.pop('key', None) if value: if value_schema: value = self._serializer.encode_record_with_schema(topic, value_schema, value) else: raise ValueSerializerError("Avro schema required for values") if key: if key_schema: key = self._serializer.encode_record_with_schema(topic, key_schema, key, True) else: raise KeySerializerError("Avro schema required for key") super(AvroProducer, self).produce(topic, value, key, **kwargs) class AvroConsumer(Consumer): """ Kafka Consumer client which does avro schema decoding of messages. Handles message deserialization. Constructor takes below parameters @:param: config: dict object with config parameters containing url for schema registry (schema.registry.url). """ def __init__(self, config, schema_registry=None): schema_registry_url = config.pop("schema.registry.url", None) if schema_registry is None: if schema_registry_url is None: raise ValueError("Missing parameter: schema.registry.url") schema_registry = CachedSchemaRegistryClient(url=schema_registry_url) elif schema_registry_url is not None: raise ValueError("Cannot pass schema_registry along with schema.registry.url config") super(AvroConsumer, self).__init__(config) self._serializer = MessageSerializer(schema_registry) def poll(self, timeout=None): """ This is an overriden method from confluent_kafka.Consumer class. This handles message deserialization using avro schema @:param timeout @:return message object with deserialized key and value as dict objects """ if timeout is None: timeout = -1 message = super(AvroConsumer, self).poll(timeout) if message is None: return None if not message.value() and not message.key(): return message if not message.error(): if message.value() is not None: decoded_value = self._serializer.decode_message(message.value()) message.set_value(decoded_value) if message.key() is not None: decoded_key = self._serializer.decode_message(message.key()) message.set_key(decoded_key) return message confluent-kafka-0.11.0/confluent_kafka/avro/cached_schema_registry_client.py0000644000076600000240000002616113107245476027766 0ustar magnusstaff00000000000000#!/usr/bin/env python # # Copyright 2016 Confluent Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # derived from https://github.com/verisign/python-confluent-schemaregistry.git # import json import logging from collections import defaultdict import requests from .error import ClientError from . import loads VALID_LEVELS = ['NONE', 'FULL', 'FORWARD', 'BACKWARD'] # Common accept header sent ACCEPT_HDR = "application/vnd.schemaregistry.v1+json, application/vnd.schemaregistry+json, application/json" log = logging.getLogger(__name__) class CachedSchemaRegistryClient(object): """ A client that talks to a Schema Registry over HTTP See http://confluent.io/docs/current/schema-registry/docs/intro.html Errors communicating to the server will result in a ClientError being raised. @:param: url: url to schema registry """ def __init__(self, url, max_schemas_per_subject=1000): """Construct a client by passing in the base URL of the schema registry server""" self.url = url.rstrip('/') self.max_schemas_per_subject = max_schemas_per_subject # subj => { schema => id } self.subject_to_schema_ids = defaultdict(dict) # id => avro_schema self.id_to_schema = defaultdict(dict) # subj => { schema => version } self.subject_to_schema_versions = defaultdict(dict) def _send_request(self, url, method='GET', body=None, headers=None): if body: body = json.dumps(body) body = body.encode('utf8') _headers = dict() _headers["Accept"] = ACCEPT_HDR if body: _headers["Content-Length"] = str(len(body)) _headers["Content-Type"] = "application/vnd.schemaregistry.v1+json" if headers: for header_name in headers: _headers[header_name] = headers[header_name] if method == 'GET': response = requests.get(url, headers=_headers) elif method == 'POST': response = requests.post(url, body, headers=_headers) elif method == 'PUT': response = requests.put(url, body, headers=_headers) elif method == 'DELETE': response = requests.delete(url, headers=_headers) else: raise ClientError("Invalid HTTP request type") result = json.loads(response.text) return (result, response.status_code) def _add_to_cache(self, cache, subject, schema, value): sub_cache = cache[subject] sub_cache[schema] = value def _cache_schema(self, schema, schema_id, subject=None, version=None): # don't overwrite anything if schema_id in self.id_to_schema: schema = self.id_to_schema[schema_id] else: self.id_to_schema[schema_id] = schema if subject: self._add_to_cache(self.subject_to_schema_ids, subject, schema, schema_id) if version: self._add_to_cache(self.subject_to_schema_versions, subject, schema, version) def register(self, subject, avro_schema): """ POST /subjects/(string: subject)/versions Register a schema with the registry under the given subject and receive a schema id. avro_schema must be a parsed schema from the python avro library Multiple instances of the same schema will result in cache misses. @:param: subject: subject name @:param: avro_schema: Avro schema to be registered @:returns: schema_id: int value """ schemas_to_id = self.subject_to_schema_ids[subject] schema_id = schemas_to_id.get(avro_schema, None) if schema_id is not None: return schema_id # send it up url = '/'.join([self.url, 'subjects', subject, 'versions']) # body is { schema : json_string } body = {'schema': json.dumps(avro_schema.to_json())} result, code = self._send_request(url, method='POST', body=body) if code == 409: raise ClientError("Incompatible Avro schema:" + str(code)) elif code == 422: raise ClientError("Invalid Avro schema:" + str(code)) elif not (code >= 200 and code <= 299): raise ClientError("Unable to register schema. Error code:" + str(code)) # result is a dict schema_id = result['id'] # cache it self._cache_schema(avro_schema, schema_id, subject) return schema_id def get_by_id(self, schema_id): """ GET /schemas/ids/{int: id} Retrieve a parsed avro schema by id or None if not found @:param: schema_id: int value @:returns: Avro schema """ if schema_id in self.id_to_schema: return self.id_to_schema[schema_id] # fetch from the registry url = '/'.join([self.url, 'schemas', 'ids', str(schema_id)]) result, code = self._send_request(url) if code == 404: log.error("Schema not found:" + str(code)) return None elif not (code >= 200 and code <= 299): log.error("Unable to get schema for the specific ID:" + str(code)) return None else: # need to parse the schema schema_str = result.get("schema") try: result = loads(schema_str) # cache it self._cache_schema(result, schema_id) return result except: # bad schema - should not happen raise ClientError("Received bad schema from registry.") def get_latest_schema(self, subject): """ GET /subjects/(string: subject)/versions/(versionId: version) Return the latest 3-tuple of: (the schema id, the parsed avro schema, the schema version) for a particular subject. This call always contacts the registry. If the subject is not found, (None,None,None) is returned. @:param: subject: subject name @:returns: (schema_id, schema, version) """ url = '/'.join([self.url, 'subjects', subject, 'versions', 'latest']) result, code = self._send_request(url) if code == 404: log.error("Schema not found:" + str(code)) return (None, None, None) elif code == 422: log.error("Invalid version:" + str(code)) return (None, None, None) elif not (code >= 200 and code <= 299): return (None, None, None) schema_id = result['id'] version = result['version'] if schema_id in self.id_to_schema: schema = self.id_to_schema[schema_id] else: try: schema = loads(result['schema']) except: # bad schema - should not happen raise ClientError("Received bad schema from registry.") self._cache_schema(schema, schema_id, subject, version) return (schema_id, schema, version) def get_version(self, subject, avro_schema): """ POST /subjects/(string: subject) Get the version of a schema for a given subject. Returns None if not found. @:param: subject: subject name @:param: avro_schema: Avro schema @:returns: version """ schemas_to_version = self.subject_to_schema_versions[subject] version = schemas_to_version.get(avro_schema, None) if version is not None: return version url = '/'.join([self.url, 'subjects', subject]) body = {'schema': json.dumps(avro_schema.to_json())} result, code = self._send_request(url, method='POST', body=body) if code == 404: log.error("Not found:" + str(code)) return None elif not (code >= 200 and code <= 299): log.error("Unable to get version of a schema:" + str(code)) return None schema_id = result['id'] version = result['version'] self._cache_schema(avro_schema, schema_id, subject, version) return version def test_compatibility(self, subject, avro_schema, version='latest'): """ POST /compatibility/subjects/(string: subject)/versions/(versionId: version) Test the compatibility of a candidate parsed schema for a given subject. By default the latest version is checked against. @:param: subject: subject name @:param: avro_schema: Avro schema @:return: True if compatible, False if not compatible """ url = '/'.join([self.url, 'compatibility', 'subjects', subject, 'versions', str(version)]) body = {'schema': json.dumps(avro_schema.to_json())} try: result, code = self._send_request(url, method='POST', body=body) if code == 404: log.error(("Subject or version not found:" + str(code))) return False elif code == 422: log.error(("Invalid subject or schema:" + str(code))) return False elif code >= 200 and code <= 299: return result.get('is_compatible') else: log.error("Unable to check the compatibility") False except: return False def update_compatibility(self, level, subject=None): """ PUT /config/(string: subject) Update the compatibility level for a subject. Level must be one of: @:param: level: ex: 'NONE','FULL','FORWARD', or 'BACKWARD' """ if level not in VALID_LEVELS: raise ClientError("Invalid level specified: %s" % (str(level))) url = '/'.join([self.url, 'config']) if subject: url += '/' + subject body = {"compatibility": level} result, code = self._send_request(url, method='PUT', body=body) if code >= 200 and code <= 299: return result['compatibility'] else: raise ClientError("Unable to update level: %s. Error code: %d" % (str(level)), code) def get_compatibility(self, subject=None): """ GET /config Get the current compatibility level for a subject. Result will be one of: @:param: subject: subject name @:return: 'NONE','FULL','FORWARD', or 'BACKWARD' """ url = '/'.join([self.url, 'config']) if subject: url += '/' + subject result, code = self._send_request(url) if code >= 200 and code <= 299: compatibility = result.get('compatibility', None) if not compatibility: compatibility = result.get('compatibilityLevel') return compatibility confluent-kafka-0.11.0/confluent_kafka/avro/error.py0000644000076600000240000000175613107245476023105 0ustar magnusstaff00000000000000#!/usr/bin/env python # # Copyright 2017 Confluent Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # class ClientError(Exception): """ Error thrown by Schema Registry clients """ def __init__(self, message, http_code=None): self.message = message self.http_code = http_code super(ClientError, self).__init__(self.__str__()) def __repr__(self): return "ClientError(error={error})".format(error=self.message) def __str__(self): return self.message confluent-kafka-0.11.0/confluent_kafka/avro/load.py0000644000076600000240000000237413107245476022670 0ustar magnusstaff00000000000000#!/usr/bin/env python # # Copyright 2017 Confluent Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import sys def loads(schema_str): """ Parse a schema given a schema string """ if sys.version_info[0] < 3: return schema.parse(schema_str) else: return schema.Parse(schema_str) def load(fp): """ Parse a schema from a file path """ with open(fp) as f: return loads(f.read()) # avro.schema.RecordSchema and avro.schema.PrimitiveSchema classes are not hashable. Hence defining them explicitly as # a quick fix def _hash_func(self): return hash(str(self)) try: from avro import schema schema.RecordSchema.__hash__ = _hash_func schema.PrimitiveSchema.__hash__ = _hash_func except ImportError: schema = None confluent-kafka-0.11.0/confluent_kafka/avro/serializer/0000755000076600000240000000000013135737034023537 5ustar magnusstaff00000000000000confluent-kafka-0.11.0/confluent_kafka/avro/serializer/__init__.py0000644000076600000240000000211013042650774025644 0ustar magnusstaff00000000000000#!/usr/bin/env python # # Copyright 2016 Confluent Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # class SerializerError(Exception): """Generic error from serializer package""" def __init__(self, message): self.message = message def __repr__(self): return '{klass}(error={error})'.format( klass=self.__class__.__name__, error=self.message ) def __str__(self): return self.message class KeySerializerError(SerializerError): pass class ValueSerializerError(SerializerError): pass confluent-kafka-0.11.0/confluent_kafka/avro/serializer/message_serializer.py0000644000076600000240000001553213107245476027777 0ustar magnusstaff00000000000000#!/usr/bin/env python # # Copyright 2016 Confluent Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # derived from https://github.com/verisign/python-confluent-schemaregistry.git # import io import logging import struct import sys import traceback import avro import avro.io from confluent_kafka.avro import ClientError from confluent_kafka.avro.serializer import (SerializerError, KeySerializerError, ValueSerializerError) log = logging.getLogger(__name__) MAGIC_BYTE = 0 HAS_FAST = False try: from fastavro.reader import read_data HAS_FAST = True except: pass class ContextStringIO(io.BytesIO): """ Wrapper to allow use of StringIO via 'with' constructs. """ def __enter__(self): return self def __exit__(self, *args): self.close() return False class MessageSerializer(object): """ A helper class that can serialize and deserialize messages that need to be encoded or decoded using the schema registry. All encode_* methods return a buffer that can be sent to kafka. All decode_* methods expect a buffer received from kafka. """ def __init__(self, registry_client): self.registry_client = registry_client self.id_to_decoder_func = {} self.id_to_writers = {} ''' ''' def encode_record_with_schema(self, topic, schema, record, is_key=False): """ Given a parsed avro schema, encode a record for the given topic. The record is expected to be a dictionary. The schema is registered with the subject of 'topic-value' @:param topic : Topic name @:param schema : Avro Schema @:param record : An object to serialize @:param is_key : If the record is a key @:returns : Encoded record with schema ID as bytes """ serialize_err = KeySerializerError if is_key else ValueSerializerError subject_suffix = ('-key' if is_key else '-value') # get the latest schema for the subject subject = topic + subject_suffix # register it schema_id = self.registry_client.register(subject, schema) if not schema_id: message = "Unable to retrieve schema id for subject %s" % (subject) raise serialize_err(message) # cache writer self.id_to_writers[schema_id] = avro.io.DatumWriter(schema) return self.encode_record_with_schema_id(schema_id, record, is_key=is_key) def encode_record_with_schema_id(self, schema_id, record, is_key=False): """ Encode a record with a given schema id. The record must be a python dictionary. @:param: schema_id : integer ID @:param: record : An object to serialize @:param is_key : If the record is a key @:returns: decoder function """ serialize_err = KeySerializerError if is_key else ValueSerializerError # use slow avro if schema_id not in self.id_to_writers: # get the writer + schema try: schema = self.registry_client.get_by_id(schema_id) if not schema: raise serialize_err("Schema does not exist") self.id_to_writers[schema_id] = avro.io.DatumWriter(schema) except ClientError: exc_type, exc_value, exc_traceback = sys.exc_info() raise serialize_err(repr(traceback.format_exception(exc_type, exc_value, exc_traceback))) # get the writer writer = self.id_to_writers[schema_id] with ContextStringIO() as outf: # write the header # magic byte outf.write(struct.pack('b', MAGIC_BYTE)) # write the schema ID in network byte order (big end) outf.write(struct.pack('>I', schema_id)) # write the record to the rest of it # Create an encoder that we'll write to encoder = avro.io.BinaryEncoder(outf) # write the magic byte # write the object in 'obj' as Avro to the fake file... writer.write(record, encoder) return outf.getvalue() # Decoder support def _get_decoder_func(self, schema_id, payload): if schema_id in self.id_to_decoder_func: return self.id_to_decoder_func[schema_id] # fetch from schema reg try: schema = self.registry_client.get_by_id(schema_id) except: schema = None if not schema: err = "unable to fetch schema with id %d" % (schema_id) raise SerializerError(err) curr_pos = payload.tell() if HAS_FAST: # try to use fast avro try: schema_dict = schema.to_json() read_data(payload, schema_dict) # If we reach this point, this means we have fastavro and it can # do this deserialization. Rewind since this method just determines # the reader function and we need to deserialize again along the # normal path. payload.seek(curr_pos) self.id_to_decoder_func[schema_id] = lambda p: read_data(p, schema_dict) return self.id_to_decoder_func[schema_id] except: pass # here means we should just delegate to slow avro # rewind payload.seek(curr_pos) avro_reader = avro.io.DatumReader(schema) def decoder(p): bin_decoder = avro.io.BinaryDecoder(p) return avro_reader.read(bin_decoder) self.id_to_decoder_func[schema_id] = decoder return self.id_to_decoder_func[schema_id] def decode_message(self, message): """ Decode a message from kafka that has been encoded for use with the schema registry. @:param: message """ if message is None: return None if len(message) <= 5: raise SerializerError("message is too small to decode") with ContextStringIO(message) as payload: magic, schema_id = struct.unpack('>bI', payload.read(5)) if magic != MAGIC_BYTE: raise SerializerError("message does not start with magic byte") decoder_func = self._get_decoder_func(schema_id, payload) return decoder_func(payload) confluent-kafka-0.11.0/confluent_kafka/kafkatest/0000755000076600000240000000000013135737034022374 5ustar magnusstaff00000000000000confluent-kafka-0.11.0/confluent_kafka/kafkatest/__init__.py0000644000076600000240000000012513042650774024505 0ustar magnusstaff00000000000000""" Python client implementations of the official Kafka tests/kafkatest clients. """ confluent-kafka-0.11.0/confluent_kafka/kafkatest/verifiable_client.py0000644000076600000240000000554413107245476026427 0ustar magnusstaff00000000000000# Copyright 2016 Confluent Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import datetime import json import os import re import signal import socket import sys import time class VerifiableClient(object): """ Generic base class for a kafkatest verifiable client. Implements the common kafkatest protocol and semantics. """ def __init__(self, conf): """ """ super(VerifiableClient, self).__init__() self.conf = conf self.conf['client.id'] = 'python@' + socket.gethostname() self.run = True signal.signal(signal.SIGTERM, self.sig_term) self.dbg('Pid is %d' % os.getpid()) def sig_term(self, sig, frame): self.dbg('SIGTERM') self.run = False @staticmethod def _timestamp(): return time.strftime('%H:%M:%S', time.localtime()) def dbg(self, s): """ Debugging printout """ sys.stderr.write('%% %s DEBUG: %s\n' % (self._timestamp(), s)) def err(self, s, term=False): """ Error printout, if term=True the process will terminate immediately. """ sys.stderr.write('%% %s ERROR: %s\n' % (self._timestamp(), s)) if term: sys.stderr.write('%% FATAL ERROR ^\n') sys.exit(1) def send(self, d): """ Send dict as JSON to stdout for consumtion by kafkatest handler """ d['_time'] = str(datetime.datetime.now()) self.dbg('SEND: %s' % json.dumps(d)) sys.stdout.write('%s\n' % json.dumps(d)) sys.stdout.flush() @staticmethod def set_config(conf, args): """ Set client config properties using args dict. """ for n, v in args.iteritems(): if v is None: continue # Things to ignore if '.' not in n: # App config, skip continue if n.startswith('topic.'): # Set "topic.<...>" properties on default topic conf dict conf['default.topic.config'][n[6:]] = v elif n == 'partition.assignment.strategy': # Convert Java class name to config value. # "org.apache.kafka.clients.consumer.RangeAssignor" -> "range" conf[n] = re.sub(r'org.apache.kafka.clients.consumer.(\w+)Assignor', lambda x: x.group(1).lower(), v) else: conf[n] = v confluent-kafka-0.11.0/confluent_kafka/kafkatest/verifiable_consumer.py0000755000076600000240000002356213107245476027007 0ustar magnusstaff00000000000000#!/usr/bin/env python # # Copyright 2016 Confluent Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import argparse from confluent_kafka import Consumer, KafkaError, KafkaException from verifiable_client import VerifiableClient class VerifiableConsumer(VerifiableClient): """ confluent-kafka-python backed VerifiableConsumer class for use with Kafka's kafkatests client tests. """ def __init__(self, conf): """ \p conf is a config dict passed to confluent_kafka.Consumer() """ super(VerifiableConsumer, self).__init__(conf) self.conf['on_commit'] = self.on_commit self.consumer = Consumer(**conf) self.consumed_msgs = 0 self.consumed_msgs_last_reported = 0 self.consumed_msgs_at_last_commit = 0 self.use_auto_commit = False self.use_async_commit = False self.max_msgs = -1 self.assignment = [] self.assignment_dict = dict() def find_assignment(self, topic, partition): """ Find and return existing assignment based on \p topic and \p partition, or None on miss. """ skey = '%s %d' % (topic, partition) return self.assignment_dict.get(skey) def send_records_consumed(self, immediate=False): """ Send records_consumed, every 100 messages, on timeout, or if immediate is set. """ if self.consumed_msgs <= self.consumed_msgs_last_reported + (0 if immediate else 100): return if len(self.assignment) == 0: return d = {'name': 'records_consumed', 'count': self.consumed_msgs - self.consumed_msgs_last_reported, 'partitions': []} for a in self.assignment: if a.min_offset == -1: # Skip partitions that havent had any messages since last time. # This is to circumvent some minOffset checks in kafkatest. continue d['partitions'].append(a.to_dict()) a.min_offset = -1 self.send(d) self.consumed_msgs_last_reported = self.consumed_msgs def send_assignment(self, evtype, partitions): """ Send assignment update, \p evtype is either 'assigned' or 'revoked' """ d = {'name': 'partitions_' + evtype, 'partitions': [{'topic': x.topic, 'partition': x.partition} for x in partitions]} self.send(d) def on_assign(self, consumer, partitions): """ Rebalance on_assign callback """ old_assignment = self.assignment self.assignment = [AssignedPartition(p.topic, p.partition) for p in partitions] # Move over our last seen offsets so that we can report a proper # minOffset even after a rebalance loop. for a in old_assignment: b = self.find_assignment(a.topic, a.partition) b.min_offset = a.min_offset self.assignment_dict = {a.skey: a for a in self.assignment} self.send_assignment('assigned', partitions) def on_revoke(self, consumer, partitions): """ Rebalance on_revoke callback """ # Send final consumed records prior to rebalancing to make sure # latest consumed is in par with what is going to be committed. self.send_records_consumed(immediate=True) self.assignment = list() self.assignment_dict = dict() self.send_assignment('revoked', partitions) self.do_commit(immediate=True) def on_commit(self, err, partitions): """ Offsets Committed callback """ if err is not None and err.code() == KafkaError._NO_OFFSET: self.dbg('on_commit(): no offsets to commit') return # Report consumed messages to make sure consumed position >= committed position self.send_records_consumed(immediate=True) d = {'name': 'offsets_committed', 'offsets': []} if err is not None: d['success'] = False d['error'] = str(err) else: d['success'] = True d['error'] = '' for p in partitions: pd = {'topic': p.topic, 'partition': p.partition, 'offset': p.offset, 'error': str(p.error)} d['offsets'].append(pd) self.send(d) def do_commit(self, immediate=False, async=None): """ Commit every 1000 messages or whenever there is a consume timeout or immediate. """ if (self.use_auto_commit or self.consumed_msgs_at_last_commit + (0 if immediate else 1000) > self.consumed_msgs): return # Make sure we report consumption before commit, # otherwise tests may fail because of commit > consumed if self.consumed_msgs_at_last_commit < self.consumed_msgs: self.send_records_consumed(immediate=True) if async is None: async_mode = self.use_async_commit else: async_mode = async self.dbg('Committing %d messages (Async=%s)' % (self.consumed_msgs - self.consumed_msgs_at_last_commit, async_mode)) try: self.consumer.commit(async=async_mode) except KafkaException as e: if e.args[0].code() == KafkaError._WAIT_COORD: self.dbg('Ignoring commit failure, still waiting for coordinator') elif e.args[0].code() == KafkaError._NO_OFFSET: self.dbg('No offsets to commit') else: raise self.consumed_msgs_at_last_commit = self.consumed_msgs def msg_consume(self, msg): """ Handle consumed message (or error event) """ if msg.error(): if msg.error().code() == KafkaError._PARTITION_EOF: # ignore EOF pass else: self.err('Consume failed: %s' % msg.error(), term=True) return if False: self.dbg('Read msg from %s [%d] @ %d' % (msg.topic(), msg.partition(), msg.offset())) if self.max_msgs >= 0 and self.consumed_msgs >= self.max_msgs: return # ignore extra messages # Find assignment. a = self.find_assignment(msg.topic(), msg.partition()) if a is None: self.err('Received message on unassigned partition %s [%d] @ %d' % (msg.topic(), msg.partition(), msg.offset()), term=True) a.consumed_msgs += 1 if a.min_offset == -1: a.min_offset = msg.offset() if a.max_offset < msg.offset(): a.max_offset = msg.offset() self.consumed_msgs += 1 self.send_records_consumed(immediate=False) self.do_commit(immediate=False) class AssignedPartition(object): """ Local state container for assigned partition. """ def __init__(self, topic, partition): super(AssignedPartition, self).__init__() self.topic = topic self.partition = partition self.skey = '%s %d' % (self.topic, self.partition) self.consumed_msgs = 0 self.min_offset = -1 self.max_offset = 0 def to_dict(self): """ Return a dict of this partition's state """ return {'topic': self.topic, 'partition': self.partition, 'minOffset': self.min_offset, 'maxOffset': self.max_offset} if __name__ == '__main__': parser = argparse.ArgumentParser(description='Verifiable Python Consumer') parser.add_argument('--topic', action='append', type=str, required=True) parser.add_argument('--group-id', dest='group.id', required=True) parser.add_argument('--broker-list', dest='bootstrap.servers', required=True) parser.add_argument('--session-timeout', type=int, dest='session.timeout.ms', default=6000) parser.add_argument('--enable-autocommit', action='store_true', dest='enable.auto.commit', default=False) parser.add_argument('--max-messages', type=int, dest='max_messages', default=-1) parser.add_argument('--assignment-strategy', dest='partition.assignment.strategy') parser.add_argument('--reset-policy', dest='topic.auto.offset.reset', default='earliest') parser.add_argument('--consumer.config', dest='consumer_config') args = vars(parser.parse_args()) conf = {'broker.version.fallback': '0.9.0', 'default.topic.config': dict()} VerifiableClient.set_config(conf, args) vc = VerifiableConsumer(conf) vc.use_auto_commit = args['enable.auto.commit'] vc.max_msgs = args['max_messages'] vc.dbg('Using config: %s' % conf) vc.dbg('Subscribing to %s' % args['topic']) vc.consumer.subscribe(args['topic'], on_assign=vc.on_assign, on_revoke=vc.on_revoke) try: while vc.run: msg = vc.consumer.poll(timeout=1.0) if msg is None: # Timeout. # Try reporting consumed messages vc.send_records_consumed(immediate=True) # Commit every poll() timeout instead of on every message. # Also commit on every 1000 messages, whichever comes first. vc.do_commit(immediate=True) continue # Handle message (or error event) vc.msg_consume(msg) except KeyboardInterrupt: pass vc.dbg('Closing consumer') vc.send_records_consumed(immediate=True) if not vc.use_auto_commit: vc.do_commit(immediate=True, async=False) vc.consumer.close() vc.send({'name': 'shutdown_complete'}) vc.dbg('All done') confluent-kafka-0.11.0/confluent_kafka/kafkatest/verifiable_producer.py0000755000076600000240000001106113107245476026766 0ustar magnusstaff00000000000000#!/usr/bin/env python # # Copyright 2016 Confluent Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import argparse import time from confluent_kafka import Producer, KafkaException from verifiable_client import VerifiableClient class VerifiableProducer(VerifiableClient): """ confluent-kafka-python backed VerifiableProducer class for use with Kafka's kafkatests client tests. """ def __init__(self, conf): """ \p conf is a config dict passed to confluent_kafka.Producer() """ super(VerifiableProducer, self).__init__(conf) self.conf['on_delivery'] = self.dr_cb self.conf['default.topic.config']['produce.offset.report'] = True self.producer = Producer(**self.conf) self.num_acked = 0 self.num_sent = 0 self.num_err = 0 def dr_cb(self, err, msg): """ Per-message Delivery report callback. Called from poll() """ if err: self.num_err += 1 self.send({'name': 'producer_send_error', 'message': str(err), 'topic': msg.topic(), 'key': msg.key(), 'value': msg.value()}) else: self.num_acked += 1 self.send({'name': 'producer_send_success', 'topic': msg.topic(), 'partition': msg.partition(), 'offset': msg.offset(), 'key': msg.key(), 'value': msg.value()}) pass if __name__ == '__main__': parser = argparse.ArgumentParser(description='Verifiable Python Producer') parser.add_argument('--topic', type=str, required=True) parser.add_argument('--throughput', type=int, default=0) parser.add_argument('--broker-list', dest='bootstrap.servers', required=True) parser.add_argument('--max-messages', type=int, dest='max_msgs', default=1000000) # avoid infinite parser.add_argument('--value-prefix', dest='value_prefix', type=str, default=None) parser.add_argument('--acks', type=int, dest='topic.request.required.acks', default=-1) parser.add_argument('--producer.config', dest='producer_config') args = vars(parser.parse_args()) conf = {'broker.version.fallback': '0.9.0', 'default.topic.config': dict()} VerifiableClient.set_config(conf, args) vp = VerifiableProducer(conf) vp.max_msgs = args['max_msgs'] throughput = args['throughput'] topic = args['topic'] if args['value_prefix'] is not None: value_fmt = args['value_prefix'] + '.%d' else: value_fmt = '%d' if throughput > 0: delay = 1.0/throughput else: delay = 0 vp.dbg('Producing %d messages at a rate of %d/s' % (vp.max_msgs, throughput)) try: for i in range(0, vp.max_msgs): if not vp.run: break t_end = time.time() + delay while vp.run: try: vp.producer.produce(topic, value=(value_fmt % i)) vp.num_sent += 1 except KafkaException as e: vp.err('produce() #%d/%d failed: %s' % (i, vp.max_msgs, str(e))) vp.num_err += 1 except BufferError: vp.dbg('Local produce queue full (produced %d/%d msgs), waiting for deliveries..' % (i, vp.max_msgs)) vp.producer.poll(timeout=0.5) continue break # Delay to achieve desired throughput, # but make sure poll is called at least once # to serve DRs. while True: remaining = max(0, t_end - time.time()) vp.producer.poll(timeout=remaining) if remaining <= 0.00000001: break except KeyboardInterrupt: pass # Flush remaining messages to broker. vp.dbg('Flushing') try: vp.producer.flush() except KeyboardInterrupt: pass vp.send({'name': 'shutdown_complete'}) vp.dbg('All done') confluent-kafka-0.11.0/confluent_kafka/src/0000755000076600000240000000000013135737034021206 5ustar magnusstaff00000000000000confluent-kafka-0.11.0/confluent_kafka/src/confluent_kafka.c0000644000076600000240000012122313122216424024474 0ustar magnusstaff00000000000000/** * Copyright 2016 Confluent Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "confluent_kafka.h" #include /** * @brief KNOWN ISSUES * * - Partitioners will cause a dead-lock with librdkafka, because: * GIL + topic lock in topic_new is different lock order than * topic lock in msg_partitioner + GIL. * This needs to be sorted out in librdkafka, preferably making the * partitioner run without any locks taken. * Until this is fixed the partitioner is ignored and librdkafka's * default will be used. * - KafkaError type .tp_doc allocation is lost on exit. * */ PyObject *KafkaException; /**************************************************************************** * * * KafkaError * * * FIXME: Pre-create simple instances for each error code, only instantiate * a new object if a rich error string is provided. * ****************************************************************************/ typedef struct { PyObject_HEAD rd_kafka_resp_err_t code; /* Error code */ char *str; /* Human readable representation of error, if one * was provided by librdkafka. * Else falls back on err2str(). */ } KafkaError; static PyObject *KafkaError_code (KafkaError *self, PyObject *ignore) { return PyLong_FromLong(self->code); } static PyObject *KafkaError_str (KafkaError *self, PyObject *ignore) { if (self->str) return cfl_PyUnistr(_FromString(self->str)); else return cfl_PyUnistr(_FromString(rd_kafka_err2str(self->code))); } static PyObject *KafkaError_name (KafkaError *self, PyObject *ignore) { /* FIXME: Pre-create name objects */ return cfl_PyUnistr(_FromString(rd_kafka_err2name(self->code))); } static PyMethodDef KafkaError_methods[] = { { "code", (PyCFunction)KafkaError_code, METH_NOARGS, " Returns the error/event code for comparison to" "KafkaError..\n" "\n" " :returns: error/event code\n" " :rtype: int\n" "\n" }, { "str", (PyCFunction)KafkaError_str, METH_NOARGS, " Returns the human-readable error/event string.\n" "\n" " :returns: error/event message string\n" " :rtype: str\n" "\n" }, { "name", (PyCFunction)KafkaError_name, METH_NOARGS, " Returns the enum name for error/event.\n" "\n" " :returns: error/event enum name string\n" " :rtype: str\n" "\n" }, { NULL } }; static void KafkaError_dealloc (KafkaError *self) { if (self->str) free(self->str); Py_TYPE(self)->tp_free((PyObject *)self); } static PyObject *KafkaError_str0 (KafkaError *self) { return cfl_PyUnistr(_FromFormat("KafkaError{code=%s,val=%d,str=\"%s\"}", rd_kafka_err2name(self->code), self->code, self->str ? self->str : rd_kafka_err2str(self->code))); } static long KafkaError_hash (KafkaError *self) { return self->code; } static PyTypeObject KafkaErrorType; static PyObject* KafkaError_richcompare (KafkaError *self, PyObject *o2, int op) { int code2; int r; PyObject *result; if (Py_TYPE(o2) == &KafkaErrorType) code2 = ((KafkaError *)o2)->code; else code2 = (int)PyLong_AsLong(o2); switch (op) { case Py_LT: r = self->code < code2; break; case Py_LE: r = self->code <= code2; break; case Py_EQ: r = self->code == code2; break; case Py_NE: r = self->code != code2; break; case Py_GT: r = self->code > code2; break; case Py_GE: r = self->code >= code2; break; default: r = 0; break; } result = r ? Py_True : Py_False; Py_INCREF(result); return result; } static PyTypeObject KafkaErrorType = { PyVarObject_HEAD_INIT(NULL, 0) "cimpl.KafkaError", /*tp_name*/ sizeof(KafkaError), /*tp_basicsize*/ 0, /*tp_itemsize*/ (destructor)KafkaError_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_compare*/ (reprfunc)KafkaError_str0, /*tp_repr*/ 0, /*tp_as_number*/ 0, /*tp_as_sequence*/ 0, /*tp_as_mapping*/ (hashfunc)KafkaError_hash, /*tp_hash */ 0, /*tp_call*/ 0, /*tp_str*/ PyObject_GenericGetAttr, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ "Kafka error and event object\n" "\n" " The KafkaError class serves multiple purposes:\n" "\n" " - Propagation of errors\n" " - Propagation of events\n" " - Exceptions\n" "\n" " This class is not user-instantiable.\n" "\n", /*tp_doc*/ 0, /* tp_traverse */ 0, /* tp_clear */ (richcmpfunc)KafkaError_richcompare, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ KafkaError_methods, /* tp_methods */ 0, /* tp_members */ 0, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ 0, /* tp_dictoffset */ 0, /* tp_init */ 0 /* tp_alloc */ }; /** * @brief Initialize a KafkaError object. */ static void KafkaError_init (KafkaError *self, rd_kafka_resp_err_t code, const char *str) { self->code = code; if (str) self->str = strdup(str); else self->str = NULL; } /** * @brief Internal factory to create KafkaError object. */ PyObject *KafkaError_new0 (rd_kafka_resp_err_t err, const char *fmt, ...) { KafkaError *self; va_list ap; char buf[512]; self = (KafkaError *)KafkaErrorType. tp_alloc(&KafkaErrorType, 0); if (!self) return NULL; if (fmt) { va_start(ap, fmt); vsnprintf(buf, sizeof(buf), fmt, ap); va_end(ap); } KafkaError_init(self, err, fmt ? buf : rd_kafka_err2str(err)); return (PyObject *)self; } /** * @brief Internal factory to create KafkaError object. * @returns a new KafkaError object if \p err != 0, else a None object. */ PyObject *KafkaError_new_or_None (rd_kafka_resp_err_t err, const char *str) { if (!err) Py_RETURN_NONE; if (str) return KafkaError_new0(err, "%s", str); else return KafkaError_new0(err, NULL); } /**************************************************************************** * * * Message * * * * ****************************************************************************/ PyObject *Message_error (Message *self, PyObject *ignore) { if (self->error) { Py_INCREF(self->error); return self->error; } else Py_RETURN_NONE; } static PyObject *Message_value (Message *self, PyObject *ignore) { if (self->value) { Py_INCREF(self->value); return self->value; } else Py_RETURN_NONE; } static PyObject *Message_key (Message *self, PyObject *ignore) { if (self->key) { Py_INCREF(self->key); return self->key; } else Py_RETURN_NONE; } static PyObject *Message_topic (Message *self, PyObject *ignore) { if (self->topic) { Py_INCREF(self->topic); return self->topic; } else Py_RETURN_NONE; } static PyObject *Message_partition (Message *self, PyObject *ignore) { if (self->partition != RD_KAFKA_PARTITION_UA) return PyLong_FromLong(self->partition); else Py_RETURN_NONE; } static PyObject *Message_offset (Message *self, PyObject *ignore) { if (self->offset >= 0) return PyLong_FromLongLong(self->offset); else Py_RETURN_NONE; } static PyObject *Message_timestamp (Message *self, PyObject *ignore) { return Py_BuildValue("iL", self->tstype, self->timestamp); } static PyObject *Message_set_value (Message *self, PyObject *new_val) { if (self->value) Py_DECREF(self->value); self->value = new_val; Py_INCREF(self->value); Py_RETURN_NONE; } static PyObject *Message_set_key (Message *self, PyObject *new_key) { if (self->key) Py_DECREF(self->key); self->key = new_key; Py_INCREF(self->key); Py_RETURN_NONE; } static PyMethodDef Message_methods[] = { { "error", (PyCFunction)Message_error, METH_NOARGS, " The message object is also used to propagate errors and events, " "an application must check error() to determine if the Message " "is a proper message (error() returns None) or an error or event " "(error() returns a KafkaError object)\n" "\n" " :rtype: None or :py:class:`KafkaError`\n" "\n" }, { "value", (PyCFunction)Message_value, METH_NOARGS, " :returns: message value (payload) or None if not available.\n" " :rtype: str|bytes or None\n" "\n" }, { "key", (PyCFunction)Message_key, METH_NOARGS, " :returns: message key or None if not available.\n" " :rtype: str|bytes or None\n" "\n" }, { "topic", (PyCFunction)Message_topic, METH_NOARGS, " :returns: topic name or None if not available.\n" " :rtype: str or None\n" "\n" }, { "partition", (PyCFunction)Message_partition, METH_NOARGS, " :returns: partition number or None if not available.\n" " :rtype: int or None\n" "\n" }, { "offset", (PyCFunction)Message_offset, METH_NOARGS, " :returns: message offset or None if not available.\n" " :rtype: int or None\n" "\n" }, { "timestamp", (PyCFunction)Message_timestamp, METH_NOARGS, " Retrieve timestamp type and timestamp from message.\n" " The timestamp type is one of:\n" " * :py:const:`TIMESTAMP_NOT_AVAILABLE`" " - Timestamps not supported by broker\n" " * :py:const:`TIMESTAMP_CREATE_TIME` " " - Message creation time (or source / producer time)\n" " * :py:const:`TIMESTAMP_LOG_APPEND_TIME` " " - Broker receive time\n" "\n" " The returned timestamp should be ignored if the timestamp type is " ":py:const:`TIMESTAMP_NOT_AVAILABLE`.\n" "\n" " The timestamp is the number of milliseconds since the epoch (UTC).\n" "\n" " Timestamps require broker version 0.10.0.0 or later and \n" " ``{'api.version.request': True}`` configured on the client.\n" "\n" " :returns: tuple of message timestamp type, and timestamp.\n" " :rtype: (int, int)\n" "\n" }, { "set_value", (PyCFunction)Message_set_value, METH_O, " Set the field 'Message.value' with new value.\n" " :param: object value: Message.value.\n" " :returns: None.\n" " :rtype: None\n" "\n" }, { "set_key", (PyCFunction)Message_set_key, METH_O, " Set the field 'Message.key' with new value.\n" " :param: object value: Message.key.\n" " :returns: None.\n" " :rtype: None\n" "\n" }, { NULL } }; static int Message_clear (Message *self) { if (self->topic) { Py_DECREF(self->topic); self->topic = NULL; } if (self->value) { Py_DECREF(self->value); self->value = NULL; } if (self->key) { Py_DECREF(self->key); self->key = NULL; } if (self->error) { Py_DECREF(self->error); self->error = NULL; } return 0; } static void Message_dealloc (Message *self) { Message_clear(self); PyObject_GC_UnTrack(self); Py_TYPE(self)->tp_free((PyObject *)self); } static int Message_traverse (Message *self, visitproc visit, void *arg) { if (self->topic) Py_VISIT(self->topic); if (self->value) Py_VISIT(self->value); if (self->key) Py_VISIT(self->key); if (self->error) Py_VISIT(self->error); return 0; } static Py_ssize_t Message__len__ (Message *self) { return self->value ? PyObject_Length(self->value) : 0; } static PySequenceMethods Message_seq_methods = { (lenfunc)Message__len__ /* sq_length */ }; PyTypeObject MessageType = { PyVarObject_HEAD_INIT(NULL, 0) "cimpl.Message", /*tp_name*/ sizeof(Message), /*tp_basicsize*/ 0, /*tp_itemsize*/ (destructor)Message_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_compare*/ 0, /*tp_repr*/ 0, /*tp_as_number*/ &Message_seq_methods, /*tp_as_sequence*/ 0, /*tp_as_mapping*/ 0, /*tp_hash */ 0, /*tp_call*/ 0, /*tp_str*/ 0, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ "The Message object represents either a single consumed or " "produced message, or an event (:py:func:`error()` is not None).\n" "\n" "An application must check with :py:func:`error()` to see if the " "object is a proper message (error() returns None) or an " "error/event.\n" "\n" "This class is not user-instantiable.\n" "\n" ".. py:function:: len()\n" "\n" " :returns: Message value (payload) size in bytes\n" " :rtype: int\n" "\n", /*tp_doc*/ (traverseproc)Message_traverse, /* tp_traverse */ (inquiry)Message_clear, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ Message_methods, /* tp_methods */ 0, /* tp_members */ 0, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ 0, /* tp_dictoffset */ 0, /* tp_init */ 0 /* tp_alloc */ }; /** * @brief Internal factory to create Message object from message_t */ PyObject *Message_new0 (const Handle *handle, const rd_kafka_message_t *rkm) { Message *self; self = (Message *)MessageType.tp_alloc(&MessageType, 0); if (!self) return NULL; /* Only use message error string on Consumer, for Producers * it will contain the original message payload. */ self->error = KafkaError_new_or_None( rkm->err, (rkm->err && handle->type != RD_KAFKA_PRODUCER) ? rd_kafka_message_errstr(rkm) : NULL); if (rkm->rkt) self->topic = cfl_PyUnistr( _FromString(rd_kafka_topic_name(rkm->rkt))); if (rkm->payload) self->value = cfl_PyBin(_FromStringAndSize(rkm->payload, rkm->len)); if (rkm->key) self->key = cfl_PyBin( _FromStringAndSize(rkm->key, rkm->key_len)); self->partition = rkm->partition; self->offset = rkm->offset; self->timestamp = rd_kafka_message_timestamp(rkm, &self->tstype); return (PyObject *)self; } /**************************************************************************** * * * TopicPartition * * * * ****************************************************************************/ static int TopicPartition_clear (TopicPartition *self) { if (self->topic) { free(self->topic); self->topic = NULL; } if (self->error) { Py_DECREF(self->error); self->error = NULL; } return 0; } static void TopicPartition_setup (TopicPartition *self, const char *topic, int partition, long long offset, rd_kafka_resp_err_t err) { self->topic = strdup(topic); self->partition = partition; self->offset = offset; self->error = KafkaError_new_or_None(err, NULL); } static void TopicPartition_dealloc (TopicPartition *self) { PyObject_GC_UnTrack(self); TopicPartition_clear(self); Py_TYPE(self)->tp_free((PyObject *)self); } static int TopicPartition_init (PyObject *self, PyObject *args, PyObject *kwargs) { const char *topic; int partition = RD_KAFKA_PARTITION_UA; long long offset = RD_KAFKA_OFFSET_INVALID; static char *kws[] = { "topic", "partition", "offset", NULL }; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|iL", kws, &topic, &partition, &offset)) return -1; TopicPartition_setup((TopicPartition *)self, topic, partition, offset, 0); return 0; } static PyObject *TopicPartition_new (PyTypeObject *type, PyObject *args, PyObject *kwargs) { PyObject *self = type->tp_alloc(type, 1); return self; } static int TopicPartition_traverse (TopicPartition *self, visitproc visit, void *arg) { if (self->error) Py_VISIT(self->error); return 0; } static PyMemberDef TopicPartition_members[] = { { "topic", T_STRING, offsetof(TopicPartition, topic), READONLY, ":py:attribute:topic - Topic name (string)" }, { "partition", T_INT, offsetof(TopicPartition, partition), 0, ":py:attribute: Partition number (int)" }, { "offset", T_LONGLONG, offsetof(TopicPartition, offset), 0, " :py:attribute: Offset (long)\n" "Either an absolute offset (>=0) or a logical offset:" " :py:const:`OFFSET_BEGINNING`," " :py:const:`OFFSET_END`," " :py:const:`OFFSET_STORED`," " :py:const:`OFFSET_INVALID`" }, { "error", T_OBJECT, offsetof(TopicPartition, error), READONLY, ":py:attribute: Indicates an error (with :py:class:`KafkaError`) unless None." }, { NULL } }; static PyObject *TopicPartition_str0 (TopicPartition *self) { PyObject *errstr = self->error == Py_None ? NULL : cfl_PyObject_Unistr(self->error); PyObject *ret; char offset_str[40]; snprintf(offset_str, sizeof(offset_str), "%"PRId64"", self->offset); ret = cfl_PyUnistr( _FromFormat("TopicPartition{topic=%s,partition=%"PRId32 ",offset=%s,error=%s}", self->topic, self->partition, offset_str, errstr ? cfl_PyUnistr_AsUTF8(errstr) : "None")); if (errstr) Py_DECREF(errstr); return ret; } static PyObject * TopicPartition_richcompare (TopicPartition *self, PyObject *o2, int op) { TopicPartition *a = self, *b; int tr, pr; int r; PyObject *result; if (Py_TYPE(o2) != Py_TYPE(self)) { PyErr_SetNone(PyExc_NotImplementedError); return NULL; } b = (TopicPartition *)o2; tr = strcmp(a->topic, b->topic); pr = a->partition - b->partition; switch (op) { case Py_LT: r = tr < 0 || (tr == 0 && pr < 0); break; case Py_LE: r = tr < 0 || (tr == 0 && pr <= 0); break; case Py_EQ: r = (tr == 0 && pr == 0); break; case Py_NE: r = (tr != 0 || pr != 0); break; case Py_GT: r = tr > 0 || (tr == 0 && pr > 0); break; case Py_GE: r = tr > 0 || (tr == 0 && pr >= 0); break; default: r = 0; break; } result = r ? Py_True : Py_False; Py_INCREF(result); return result; } static long TopicPartition_hash (TopicPartition *self) { PyObject *topic = cfl_PyUnistr(_FromString(self->topic)); long r = PyObject_Hash(topic) ^ self->partition; Py_DECREF(topic); return r; } PyTypeObject TopicPartitionType = { PyVarObject_HEAD_INIT(NULL, 0) "cimpl.TopicPartition", /*tp_name*/ sizeof(TopicPartition), /*tp_basicsize*/ 0, /*tp_itemsize*/ (destructor)TopicPartition_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_compare*/ (reprfunc)TopicPartition_str0, /*tp_repr*/ 0, /*tp_as_number*/ 0, /*tp_as_sequence*/ 0, /*tp_as_mapping*/ (hashfunc)TopicPartition_hash, /*tp_hash */ 0, /*tp_call*/ 0, /*tp_str*/ PyObject_GenericGetAttr, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ "TopicPartition is a generic type to hold a single partition and " "various information about it.\n" "\n" "It is typically used to provide a list of topics or partitions for " "various operations, such as :py:func:`Consumer.assign()`.\n" "\n" ".. py:function:: TopicPartition(topic, [partition], [offset])\n" "\n" " Instantiate a TopicPartition object.\n" "\n" " :param string topic: Topic name\n" " :param int partition: Partition id\n" " :param int offset: Initial partition offset\n" " :rtype: TopicPartition\n" "\n" "\n", /*tp_doc*/ (traverseproc)TopicPartition_traverse, /* tp_traverse */ (inquiry)TopicPartition_clear, /* tp_clear */ (richcmpfunc)TopicPartition_richcompare, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ 0, /* tp_methods */ TopicPartition_members,/* tp_members */ 0, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ 0, /* tp_dictoffset */ TopicPartition_init, /* tp_init */ 0, /* tp_alloc */ TopicPartition_new /* tp_new */ }; /** * @brief Internal factory to create a TopicPartition object. */ static PyObject *TopicPartition_new0 (const char *topic, int partition, long long offset, rd_kafka_resp_err_t err) { TopicPartition *self; self = (TopicPartition *)TopicPartitionType.tp_new( &TopicPartitionType, NULL, NULL); TopicPartition_setup(self, topic, partition, offset, err); return (PyObject *)self; } /** * @brief Convert C rd_kafka_topic_partition_list_t to Python list(TopicPartition). * * @returns The new Python list object. */ PyObject *c_parts_to_py (const rd_kafka_topic_partition_list_t *c_parts) { PyObject *parts; size_t i; parts = PyList_New(c_parts->cnt); for (i = 0 ; i < c_parts->cnt ; i++) { const rd_kafka_topic_partition_t *rktpar = &c_parts->elems[i]; PyList_SET_ITEM(parts, i, TopicPartition_new0( rktpar->topic, rktpar->partition, rktpar->offset, rktpar->err)); } return parts; } /** * @brief Convert Python list(TopicPartition) to C rd_kafka_topic_partition_list_t. * * @returns The new C list on success or NULL on error. */ rd_kafka_topic_partition_list_t *py_to_c_parts (PyObject *plist) { rd_kafka_topic_partition_list_t *c_parts; size_t i; if (!PyList_Check(plist)) { PyErr_SetString(PyExc_TypeError, "requires list of TopicPartition"); return NULL; } c_parts = rd_kafka_topic_partition_list_new((int)PyList_Size(plist)); for (i = 0 ; i < PyList_Size(plist) ; i++) { TopicPartition *tp = (TopicPartition *) PyList_GetItem(plist, i); if (PyObject_Type((PyObject *)tp) != (PyObject *)&TopicPartitionType) { PyErr_Format(PyExc_TypeError, "expected %s", TopicPartitionType.tp_name); rd_kafka_topic_partition_list_destroy(c_parts); return NULL; } rd_kafka_topic_partition_list_add(c_parts, tp->topic, tp->partition)->offset = tp->offset; } return c_parts; } /**************************************************************************** * * * Common callbacks * * * * ****************************************************************************/ static void error_cb (rd_kafka_t *rk, int err, const char *reason, void *opaque) { Handle *h = opaque; PyObject *eo, *result; CallState *cs; cs = CallState_get(h); if (!h->error_cb) { /* No callback defined */ goto done; } eo = KafkaError_new0(err, "%s", reason); result = PyObject_CallFunctionObjArgs(h->error_cb, eo, NULL); Py_DECREF(eo); if (result) Py_DECREF(result); else { CallState_crash(cs); rd_kafka_yield(h->rk); } done: CallState_resume(cs); } static int stats_cb(rd_kafka_t *rk, char *json, size_t json_len, void *opaque) { Handle *h = opaque; PyObject *eo = NULL, *result = NULL; CallState *cs = NULL; cs = CallState_get(h); if (json_len == 0) { /* No data returned*/ goto done; } eo = Py_BuildValue("s", json); result = PyObject_CallFunctionObjArgs(h->stats_cb, eo, NULL); Py_DECREF(eo); if (result) Py_DECREF(result); else { CallState_crash(cs); rd_kafka_yield(h->rk); } done: CallState_resume(cs); return 0; } /**************************************************************************** * * * Common helpers * * * * ****************************************************************************/ /** * Clear Python object references in Handle */ void Handle_clear (Handle *h) { if (h->error_cb) Py_DECREF(h->error_cb); if (h->stats_cb) Py_DECREF(h->stats_cb); if (h->initiated) PyThread_delete_key(h->tlskey); } /** * GC traversal for Python object references */ int Handle_traverse (Handle *h, visitproc visit, void *arg) { if (h->error_cb) Py_VISIT(h->error_cb); if (h->stats_cb) Py_VISIT(h->stats_cb); return 0; } /** * Populate topic conf from provided dict. * * Will raise an exception on error and return -1, or returns 0 on success. */ static int populate_topic_conf (rd_kafka_topic_conf_t *tconf, const char *what, PyObject *dict) { Py_ssize_t pos = 0; PyObject *ko, *vo; if (!PyDict_Check(dict)) { cfl_PyErr_Format(RD_KAFKA_RESP_ERR__INVALID_ARG, "%s: requires a dict", what); return -1; } while (PyDict_Next(dict, &pos, &ko, &vo)) { PyObject *ks; PyObject *vs; const char *k; const char *v; char errstr[256]; if (!(ks = cfl_PyObject_Unistr(ko))) { PyErr_SetString(PyExc_TypeError, "expected configuration property " "value as type unicode string"); return -1; } if (!(vs = cfl_PyObject_Unistr(vo))) { PyErr_SetString(PyExc_TypeError, "expected configuration property " "value as type unicode string"); Py_DECREF(ks); return -1; } k = cfl_PyUnistr_AsUTF8(ks); v = cfl_PyUnistr_AsUTF8(vs); if (rd_kafka_topic_conf_set(tconf, k, v, errstr, sizeof(errstr)) != RD_KAFKA_CONF_OK) { cfl_PyErr_Format(RD_KAFKA_RESP_ERR__INVALID_ARG, "%s: %s", what, errstr); Py_DECREF(ks); Py_DECREF(vs); return -1; } Py_DECREF(ks); Py_DECREF(vs); } return 0; } /** * @brief Set single special producer config value. * * @returns 1 if handled, 0 if unknown, or -1 on failure (exception raised). */ static int producer_conf_set_special (Handle *self, rd_kafka_conf_t *conf, rd_kafka_topic_conf_t *tconf, const char *name, PyObject *valobj) { PyObject *vs; const char *val; if (!strcasecmp(name, "on_delivery")) { if (!PyCallable_Check(valobj)) { cfl_PyErr_Format( RD_KAFKA_RESP_ERR__INVALID_ARG, "%s requires a callable " "object", name); return -1; } self->u.Producer.default_dr_cb = valobj; Py_INCREF(self->u.Producer.default_dr_cb); return 1; } else if (!strcasecmp(name, "partitioner") || !strcasecmp(name, "partitioner_callback")) { if ((vs = cfl_PyObject_Unistr(valobj))) { /* Use built-in C partitioners, * based on their name. */ val = cfl_PyUnistr_AsUTF8(vs); if (!strcmp(val, "random")) rd_kafka_topic_conf_set_partitioner_cb( tconf, rd_kafka_msg_partitioner_random); else if (!strcmp(val, "consistent")) rd_kafka_topic_conf_set_partitioner_cb( tconf, rd_kafka_msg_partitioner_consistent); else if (!strcmp(val, "consistent_random")) rd_kafka_topic_conf_set_partitioner_cb( tconf, rd_kafka_msg_partitioner_consistent_random); else { cfl_PyErr_Format( RD_KAFKA_RESP_ERR__INVALID_ARG, "unknown builtin partitioner: %s " "(available: random, consistent, consistent_random)", val); Py_DECREF(vs); return -1; } Py_DECREF(vs); } else { /* Custom partitioner (Python callback) */ if (!PyCallable_Check(valobj)) { cfl_PyErr_Format( RD_KAFKA_RESP_ERR__INVALID_ARG, "%s requires a callable " "object", name); return -1; } /* FIXME: Error out until GIL+rdkafka lock-ordering is fixed. */ if (1) { cfl_PyErr_Format( RD_KAFKA_RESP_ERR__NOT_IMPLEMENTED, "custom partitioner support not yet implemented"); return -1; } if (self->u.Producer.partitioner_cb) Py_DECREF(self->u.Producer.partitioner_cb); self->u.Producer.partitioner_cb = valobj; Py_INCREF(self->u.Producer.partitioner_cb); /* Use trampoline to call Python code. */ rd_kafka_topic_conf_set_partitioner_cb(tconf, Producer_partitioner_cb); } return 1; } else if (!strcmp(name, "delivery.report.only.error")) { /* Since we allocate msgstate for each produced message * with a callback we can't use delivery.report.only.error * as-is, as we wouldn't be able to ever free those msgstates. * Instead we shortcut this setting in the Python client, * providing the same functionality from dr_msg_cb trampoline. */ if (!PyBool_Check(valobj)) { cfl_PyErr_Format( RD_KAFKA_RESP_ERR__INVALID_ARG, "%s requires bool", name); return -1; } self->u.Producer.dr_only_error = valobj == Py_True; return 1; } return 0; /* Not handled */ } /** * @brief Set single special consumer config value. * * @returns 1 if handled, 0 if unknown, or -1 on failure (exception raised). */ static int consumer_conf_set_special (Handle *self, rd_kafka_conf_t *conf, rd_kafka_topic_conf_t *tconf, const char *name, PyObject *valobj) { if (!strcasecmp(name, "on_commit")) { if (!PyCallable_Check(valobj)) { cfl_PyErr_Format( RD_KAFKA_RESP_ERR__INVALID_ARG, "%s requires a callable " "object", name); return -1; } self->u.Consumer.on_commit = valobj; Py_INCREF(self->u.Consumer.on_commit); return 1; } return 0; } /** * Common config setup for Kafka client handles. * * Returns a conf object on success or NULL on failure in which case * an exception has been raised. */ rd_kafka_conf_t *common_conf_setup (rd_kafka_type_t ktype, Handle *h, PyObject *args, PyObject *kwargs) { rd_kafka_conf_t *conf; rd_kafka_topic_conf_t *tconf; Py_ssize_t pos = 0; PyObject *ko, *vo; int32_t (*partitioner_cb) (const rd_kafka_topic_t *, const void *, size_t, int32_t, void *, void *) = partitioner_cb; if (!kwargs) { /* If no kwargs, fall back on single dict arg, if any. */ if (!args || !PyTuple_Check(args) || PyTuple_Size(args) < 1 || !PyDict_Check((kwargs = PyTuple_GetItem(args, 0)))) { PyErr_SetString(PyExc_TypeError, "expected configuration dict"); return NULL; } } conf = rd_kafka_conf_new(); tconf = rd_kafka_topic_conf_new(); /* Convert kwargs dict to config key-value pairs. */ while (PyDict_Next(kwargs, &pos, &ko, &vo)) { PyObject *ks; PyObject *vs = NULL; const char *k; const char *v; char errstr[256]; int r; if (!(ks = cfl_PyObject_Unistr(ko))) { PyErr_SetString(PyExc_TypeError, "expected configuration property name " "as type unicode string"); rd_kafka_topic_conf_destroy(tconf); rd_kafka_conf_destroy(conf); return NULL; } k = cfl_PyUnistr_AsUTF8(ks); if (!strcmp(k, "default.topic.config")) { if (populate_topic_conf(tconf, k, vo) == -1) { Py_DECREF(ks); rd_kafka_topic_conf_destroy(tconf); rd_kafka_conf_destroy(conf); return NULL; } Py_DECREF(ks); continue; } else if (!strcmp(k, "error_cb")) { if (!PyCallable_Check(vo)) { PyErr_SetString(PyExc_TypeError, "expected error_cb property " "as a callable function"); rd_kafka_topic_conf_destroy(tconf); rd_kafka_conf_destroy(conf); Py_DECREF(ks); return NULL; } if (h->error_cb) { Py_DECREF(h->error_cb); h->error_cb = NULL; } if (vo != Py_None) { h->error_cb = vo; Py_INCREF(h->error_cb); } Py_DECREF(ks); continue; } else if (!strcmp(k, "stats_cb")) { if (!PyCallable_Check(vo)) { PyErr_SetString(PyExc_TypeError, "expected stats_cb property " "as a callable function"); rd_kafka_topic_conf_destroy(tconf); rd_kafka_conf_destroy(conf); Py_DECREF(ks); return NULL; } if (h->stats_cb) { Py_DECREF(h->stats_cb); h->stats_cb = NULL; } if (vo != Py_None) { h->stats_cb = vo; Py_INCREF(h->stats_cb); } Py_DECREF(ks); continue; } /* Special handling for certain config keys. */ if (ktype == RD_KAFKA_PRODUCER) r = producer_conf_set_special(h, conf, tconf, k, vo); else r = consumer_conf_set_special(h, conf, tconf, k, vo); if (r == -1) { /* Error */ Py_DECREF(ks); rd_kafka_topic_conf_destroy(tconf); rd_kafka_conf_destroy(conf); return NULL; } else if (r == 1) { /* Handled */ continue; } /* * Pass configuration property through to librdkafka. */ if (vo == Py_None) { v = NULL; } else { if (!(vs = cfl_PyObject_Unistr(vo))) { PyErr_SetString(PyExc_TypeError, "expected configuration " "property value as type " "unicode string"); rd_kafka_topic_conf_destroy(tconf); rd_kafka_conf_destroy(conf); Py_DECREF(ks); return NULL; } v = cfl_PyUnistr_AsUTF8(vs); } if (rd_kafka_conf_set(conf, k, v, errstr, sizeof(errstr)) != RD_KAFKA_CONF_OK) { cfl_PyErr_Format(RD_KAFKA_RESP_ERR__INVALID_ARG, "%s", errstr); rd_kafka_topic_conf_destroy(tconf); rd_kafka_conf_destroy(conf); Py_XDECREF(vs); Py_DECREF(ks); return NULL; } Py_XDECREF(vs); Py_DECREF(ks); } if (h->error_cb) rd_kafka_conf_set_error_cb(conf, error_cb); if (h->stats_cb) rd_kafka_conf_set_stats_cb(conf, stats_cb); rd_kafka_topic_conf_set_opaque(tconf, h); rd_kafka_conf_set_default_topic_conf(conf, tconf); rd_kafka_conf_set_opaque(conf, h); h->tlskey = PyThread_create_key(); h->initiated = 1; return conf; } /** * @brief Initialiase a CallState and unlock the GIL prior to a * possibly blocking external call. */ void CallState_begin (Handle *h, CallState *cs) { cs->thread_state = PyEval_SaveThread(); assert(cs->thread_state != NULL); cs->crashed = 0; PyThread_set_key_value(h->tlskey, cs); } /** * @brief Relock the GIL after external call is done. * @returns 0 if a Python signal was raised or a callback crashed, else 1. */ int CallState_end (Handle *h, CallState *cs) { PyThread_delete_key_value(h->tlskey); PyEval_RestoreThread(cs->thread_state); if (PyErr_CheckSignals() == -1 || cs->crashed) return 0; return 1; } /** * @brief Get the current thread's CallState and re-locks the GIL. */ CallState *CallState_get (Handle *h) { CallState *cs = PyThread_get_key_value(h->tlskey); assert(cs != NULL); assert(cs->thread_state != NULL); PyEval_RestoreThread(cs->thread_state); cs->thread_state = NULL; return cs; } /** * @brief Un-locks the GIL to resume blocking external call. */ void CallState_resume (CallState *cs) { assert(cs->thread_state == NULL); cs->thread_state = PyEval_SaveThread(); } /** * @brief Indicate that call crashed. */ void CallState_crash (CallState *cs) { cs->crashed++; } /**************************************************************************** * * * Base * * * * ****************************************************************************/ static PyObject *libversion (PyObject *self, PyObject *args) { return Py_BuildValue("si", rd_kafka_version_str(), rd_kafka_version()); } static PyObject *version (PyObject *self, PyObject *args) { return Py_BuildValue("si", "0.11.0", 0x000b0000); } static PyMethodDef cimpl_methods[] = { {"libversion", libversion, METH_NOARGS, " Retrieve librdkafka version string and integer\n" "\n" " :returns: (version_string, version_int) tuple\n" " :rtype: tuple(str,int)\n" "\n" }, {"version", version, METH_NOARGS, " Retrieve module version string and integer\n" "\n" " :returns: (version_string, version_int) tuple\n" " :rtype: tuple(str,int)\n" "\n" }, { NULL } }; /** * @brief Add librdkafka error enums to KafkaError's type dict. * @returns an updated doc string containing all error constants. */ static char *KafkaError_add_errs (PyObject *dict, const char *origdoc) { const struct rd_kafka_err_desc *descs; size_t cnt; size_t i; char *doc; size_t dof = 0, dsize; /* RST grid table column widths */ #define _COL1_W 50 #define _COL2_W 100 /* Must be larger than COL1 */ char dash[_COL2_W], eq[_COL2_W]; rd_kafka_get_err_descs(&descs, &cnt); memset(dash, '-', sizeof(dash)); memset(eq, '=', sizeof(eq)); /* Setup output doc buffer. */ dof = strlen(origdoc); dsize = dof + 500 + (cnt * 200); doc = malloc(dsize); memcpy(doc, origdoc, dof+1); #define _PRINT(...) do { \ char tmpdoc[512]; \ size_t _len; \ _len = snprintf(tmpdoc, sizeof(tmpdoc), __VA_ARGS__); \ if (_len > sizeof(tmpdoc)) _len = sizeof(tmpdoc)-1; \ if (dof + _len >= dsize) { \ dsize += 2; \ doc = realloc(doc, dsize); \ } \ memcpy(doc+dof, tmpdoc, _len+1); \ dof += _len; \ } while (0) /* Error constant table header (RST grid table) */ _PRINT("Error and event constants:\n\n" "+-%.*s-+-%.*s-+\n" "| %-*.*s | %-*.*s |\n" "+=%.*s=+=%.*s=+\n", _COL1_W, dash, _COL2_W, dash, _COL1_W, _COL1_W, "Constant", _COL2_W, _COL2_W, "Description", _COL1_W, eq, _COL2_W, eq); for (i = 0 ; i < cnt ; i++) { PyObject *code; if (!descs[i].desc) continue; code = PyLong_FromLong(descs[i].code); PyDict_SetItemString(dict, descs[i].name, code); Py_DECREF(code); _PRINT("| %-*.*s | %-*.*s |\n" "+-%.*s-+-%.*s-+\n", _COL1_W, _COL1_W, descs[i].name, _COL2_W, _COL2_W, descs[i].desc, _COL1_W, dash, _COL2_W, dash); } _PRINT("\n"); return doc; // FIXME: leak } #ifdef PY3 static struct PyModuleDef cimpl_moduledef = { PyModuleDef_HEAD_INIT, "cimpl", /* m_name */ "Confluent's Apache Kafka Python client (C implementation)", /* m_doc */ -1, /* m_size */ cimpl_methods, /* m_methods */ }; #endif static PyObject *_init_cimpl (void) { PyObject *m; if (PyType_Ready(&KafkaErrorType) < 0) return NULL; if (PyType_Ready(&MessageType) < 0) return NULL; if (PyType_Ready(&TopicPartitionType) < 0) return NULL; if (PyType_Ready(&ProducerType) < 0) return NULL; if (PyType_Ready(&ConsumerType) < 0) return NULL; #ifdef PY3 m = PyModule_Create(&cimpl_moduledef); #else m = Py_InitModule3("cimpl", cimpl_methods, "Confluent's Apache Kafka Python client (C implementation)"); #endif if (!m) return NULL; Py_INCREF(&KafkaErrorType); KafkaErrorType.tp_doc = KafkaError_add_errs(KafkaErrorType.tp_dict, KafkaErrorType.tp_doc); PyModule_AddObject(m, "KafkaError", (PyObject *)&KafkaErrorType); Py_INCREF(&MessageType); PyModule_AddObject(m, "Message", (PyObject *)&MessageType); Py_INCREF(&TopicPartitionType); PyModule_AddObject(m, "TopicPartition", (PyObject *)&TopicPartitionType); Py_INCREF(&ProducerType); PyModule_AddObject(m, "Producer", (PyObject *)&ProducerType); Py_INCREF(&ConsumerType); PyModule_AddObject(m, "Consumer", (PyObject *)&ConsumerType); KafkaException = PyErr_NewExceptionWithDoc( "cimpl.KafkaException", "Kafka exception that wraps the :py:class:`KafkaError` " "class.\n" "\n" "Use ``exception.args[0]`` to extract the " ":py:class:`KafkaError` object\n" "\n", NULL, NULL); Py_INCREF(KafkaException); PyModule_AddObject(m, "KafkaException", KafkaException); PyModule_AddIntConstant(m, "TIMESTAMP_NOT_AVAILABLE", RD_KAFKA_TIMESTAMP_NOT_AVAILABLE); PyModule_AddIntConstant(m, "TIMESTAMP_CREATE_TIME", RD_KAFKA_TIMESTAMP_CREATE_TIME); PyModule_AddIntConstant(m, "TIMESTAMP_LOG_APPEND_TIME", RD_KAFKA_TIMESTAMP_LOG_APPEND_TIME); PyModule_AddIntConstant(m, "OFFSET_BEGINNING", RD_KAFKA_OFFSET_BEGINNING); PyModule_AddIntConstant(m, "OFFSET_END", RD_KAFKA_OFFSET_END); PyModule_AddIntConstant(m, "OFFSET_STORED", RD_KAFKA_OFFSET_STORED); PyModule_AddIntConstant(m, "OFFSET_INVALID", RD_KAFKA_OFFSET_INVALID); return m; } #ifdef PY3 PyMODINIT_FUNC PyInit_cimpl (void) { return _init_cimpl(); } #else PyMODINIT_FUNC initcimpl (void) { _init_cimpl(); } #endif confluent-kafka-0.11.0/confluent_kafka/src/confluent_kafka.h0000644000076600000240000001541113071245136024507 0ustar magnusstaff00000000000000/** * Copyright 2016 Confluent Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #if PY_MAJOR_VERSION >= 3 #define PY3 #include #endif /** * librdkafka feature detection */ #ifdef RD_KAFKA_V_TIMESTAMP #define HAVE_PRODUCEV 1 /* rd_kafka_producev() */ #endif /**************************************************************************** * * * Python 2 & 3 portability * * Binary data (we call it cfl_PyBin): * Python 2: string * Python 3: bytes * * Unicode Strings (we call it cfl_PyUnistr): * Python 2: unicode * Python 3: strings * ****************************************************************************/ #ifdef PY3 /* Python 3 */ /** * @brief Binary type, use as cfl_PyBin(_X(A,B)) where _X() is the type-less * suffix of a PyBytes/Str_X() function */ #define cfl_PyBin(X) PyBytes ## X /** * @brief Unicode type, same usage as PyBin() */ #define cfl_PyUnistr(X) PyUnicode ## X /** * @returns Unicode Python object as char * in UTF-8 encoding */ #define cfl_PyUnistr_AsUTF8(X) PyUnicode_AsUTF8(X) /** * @returns Unicode Python string object */ #define cfl_PyObject_Unistr(X) PyObject_Str(X) #else /* Python 2 */ /* See comments above */ #define cfl_PyBin(X) PyString ## X #define cfl_PyUnistr(X) PyUnicode ## X #define cfl_PyUnistr_AsUTF8(X) PyBytes_AsString(PyUnicode_AsUTF8String(X)) #define cfl_PyObject_Unistr(X) PyObject_Unicode(X) #endif /**************************************************************************** * * * KafkaError * * * * ****************************************************************************/ extern PyObject *KafkaException; PyObject *KafkaError_new0 (rd_kafka_resp_err_t err, const char *fmt, ...); PyObject *KafkaError_new_or_None (rd_kafka_resp_err_t err, const char *str); /** * @brief Raise an exception using KafkaError. * \p err and and \p ... (string representation of error) is set on the returned * KafkaError object. */ #define cfl_PyErr_Format(err,...) do { \ PyObject *_eo = KafkaError_new0(err, __VA_ARGS__); \ PyErr_SetObject(KafkaException, _eo); \ Py_DECREF(_eo); \ } while (0) /**************************************************************************** * * * Common instance handle for both Producer and Consumer * * * * ****************************************************************************/ typedef struct { PyObject_HEAD rd_kafka_t *rk; PyObject *error_cb; PyObject *stats_cb; int initiated; int tlskey; /* Thread-Local-Storage key */ rd_kafka_type_t type; /* Producer or consumer */ union { /** * Producer */ struct { PyObject *default_dr_cb; PyObject *partitioner_cb; /**< Registered Python partitioner */ int32_t (*c_partitioner_cb) ( const rd_kafka_topic_t *, const void *, size_t, int32_t, void *, void *); /**< Fallback C partitioner*/ int dr_only_error; /**< delivery.report.only.error */ } Producer; /** * Consumer */ struct { int rebalance_assigned; /* Rebalance: Callback performed assign() call.*/ PyObject *on_assign; /* Rebalance: on_assign callback */ PyObject *on_revoke; /* Rebalance: on_revoke callback */ PyObject *on_commit; /* Commit callback */ } Consumer; } u; } Handle; void Handle_clear (Handle *h); int Handle_traverse (Handle *h, visitproc visit, void *arg); /** * @brief Current thread's state for "blocking" calls to librdkafka. */ typedef struct { PyThreadState *thread_state; int crashed; /* Callback crashed */ } CallState; /** * @brief Initialiase a CallState and unlock the GIL prior to a * possibly blocking external call. */ void CallState_begin (Handle *h, CallState *cs); /** * @brief Relock the GIL after external call is done, remove TLS state. * @returns 0 if a Python signal was raised or a callback crashed, else 1. */ int CallState_end (Handle *h, CallState *cs); /** * @brief Get the current thread's CallState and re-locks the GIL. */ CallState *CallState_get (Handle *h); /** * @brief Un-locks the GIL to resume blocking external call. */ void CallState_resume (CallState *cs); /** * @brief Indicate that call crashed. */ void CallState_crash (CallState *cs); /**************************************************************************** * * * TopicPartition * * * * ****************************************************************************/ typedef struct { PyObject_HEAD char *topic; int partition; int64_t offset; PyObject *error; } TopicPartition; extern PyTypeObject TopicPartitionType; /**************************************************************************** * * * Common * * * * ****************************************************************************/ rd_kafka_conf_t *common_conf_setup (rd_kafka_type_t ktype, Handle *h, PyObject *args, PyObject *kwargs); PyObject *c_parts_to_py (const rd_kafka_topic_partition_list_t *c_parts); rd_kafka_topic_partition_list_t *py_to_c_parts (PyObject *plist); /**************************************************************************** * * * Message * * * * ****************************************************************************/ /** * @brief confluent_kafka.Message object */ typedef struct { PyObject_HEAD PyObject *topic; PyObject *value; PyObject *key; PyObject *error; int32_t partition; int64_t offset; int64_t timestamp; rd_kafka_timestamp_type_t tstype; } Message; extern PyTypeObject MessageType; PyObject *Message_new0 (const Handle *handle, const rd_kafka_message_t *rkm); PyObject *Message_error (Message *self, PyObject *ignore); /**************************************************************************** * * * Producer * * * * ****************************************************************************/ extern PyTypeObject ProducerType; int32_t Producer_partitioner_cb (const rd_kafka_topic_t *rkt, const void *keydata, size_t keylen, int32_t partition_cnt, void *rkt_opaque, void *msg_opaque); /**************************************************************************** * * * Consumer * * * * ****************************************************************************/ extern PyTypeObject ConsumerType; confluent-kafka-0.11.0/confluent_kafka/src/Consumer.c0000644000076600000240000005757213052344654023165 0ustar magnusstaff00000000000000/** * Copyright 2016 Confluent Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "confluent_kafka.h" /**************************************************************************** * * * Consumer * * * * ****************************************************************************/ static int Consumer_clear (Handle *self) { if (self->u.Consumer.on_assign) { Py_DECREF(self->u.Consumer.on_assign); self->u.Consumer.on_assign = NULL; } if (self->u.Consumer.on_revoke) { Py_DECREF(self->u.Consumer.on_revoke); self->u.Consumer.on_revoke = NULL; } if (self->u.Consumer.on_commit) { Py_DECREF(self->u.Consumer.on_commit); self->u.Consumer.on_commit = NULL; } Handle_clear(self); return 0; } static void Consumer_dealloc (Handle *self) { PyObject_GC_UnTrack(self); Consumer_clear(self); if (self->rk) { CallState cs; CallState_begin(self, &cs); /* If application has not called c.close() then * rd_kafka_destroy() will, and that might trigger * callbacks to be called from consumer_close(). * This should probably be fixed in librdkafka, * or the application. */ rd_kafka_destroy(self->rk); CallState_end(self, &cs); } Py_TYPE(self)->tp_free((PyObject *)self); } static int Consumer_traverse (Handle *self, visitproc visit, void *arg) { if (self->u.Consumer.on_assign) Py_VISIT(self->u.Consumer.on_assign); if (self->u.Consumer.on_revoke) Py_VISIT(self->u.Consumer.on_revoke); if (self->u.Consumer.on_commit) Py_VISIT(self->u.Consumer.on_commit); Handle_traverse(self, visit, arg); return 0; } static PyObject *Consumer_subscribe (Handle *self, PyObject *args, PyObject *kwargs) { rd_kafka_topic_partition_list_t *topics; static char *kws[] = { "topics", "on_assign", "on_revoke", NULL }; PyObject *tlist, *on_assign = NULL, *on_revoke = NULL; Py_ssize_t pos = 0; rd_kafka_resp_err_t err; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OO", kws, &tlist, &on_assign, &on_revoke)) return NULL; if (!PyList_Check(tlist)) { PyErr_Format(PyExc_TypeError, "expected list of topic unicode strings"); return NULL; } if (on_assign && !PyCallable_Check(on_assign)) { PyErr_Format(PyExc_TypeError, "on_assign expects a callable"); return NULL; } if (on_revoke && !PyCallable_Check(on_revoke)) { PyErr_Format(PyExc_TypeError, "on_revoke expects a callable"); return NULL; } topics = rd_kafka_topic_partition_list_new((int)PyList_Size(tlist)); for (pos = 0 ; pos < PyList_Size(tlist) ; pos++) { PyObject *o = PyList_GetItem(tlist, pos); PyObject *uo; if (!(uo = cfl_PyObject_Unistr(o))) { PyErr_Format(PyExc_TypeError, "expected list of unicode strings"); rd_kafka_topic_partition_list_destroy(topics); return NULL; } rd_kafka_topic_partition_list_add(topics, cfl_PyUnistr_AsUTF8(uo), RD_KAFKA_PARTITION_UA); Py_DECREF(uo); } err = rd_kafka_subscribe(self->rk, topics); rd_kafka_topic_partition_list_destroy(topics); if (err) { cfl_PyErr_Format(err, "Failed to set subscription: %s", rd_kafka_err2str(err)); return NULL; } /* * Update rebalance callbacks */ if (self->u.Consumer.on_assign) { Py_DECREF(self->u.Consumer.on_assign); self->u.Consumer.on_assign = NULL; } if (on_assign) { self->u.Consumer.on_assign = on_assign; Py_INCREF(self->u.Consumer.on_assign); } if (self->u.Consumer.on_revoke) { Py_DECREF(self->u.Consumer.on_revoke); self->u.Consumer.on_revoke = NULL; } if (on_revoke) { self->u.Consumer.on_revoke = on_revoke; Py_INCREF(self->u.Consumer.on_revoke); } Py_RETURN_NONE; } static PyObject *Consumer_unsubscribe (Handle *self, PyObject *ignore) { rd_kafka_resp_err_t err; err = rd_kafka_unsubscribe(self->rk); if (err) { cfl_PyErr_Format(err, "Failed to remove subscription: %s", rd_kafka_err2str(err)); return NULL; } Py_RETURN_NONE; } static PyObject *Consumer_assign (Handle *self, PyObject *tlist) { rd_kafka_topic_partition_list_t *c_parts; rd_kafka_resp_err_t err; if (!(c_parts = py_to_c_parts(tlist))) return NULL; self->u.Consumer.rebalance_assigned++; err = rd_kafka_assign(self->rk, c_parts); rd_kafka_topic_partition_list_destroy(c_parts); if (err) { cfl_PyErr_Format(err, "Failed to set assignment: %s", rd_kafka_err2str(err)); return NULL; } Py_RETURN_NONE; } static PyObject *Consumer_unassign (Handle *self, PyObject *ignore) { rd_kafka_resp_err_t err; self->u.Consumer.rebalance_assigned++; err = rd_kafka_assign(self->rk, NULL); if (err) { cfl_PyErr_Format(err, "Failed to remove assignment: %s", rd_kafka_err2str(err)); return NULL; } Py_RETURN_NONE; } static PyObject *Consumer_assignment (Handle *self, PyObject *args, PyObject *kwargs) { PyObject *plist; rd_kafka_topic_partition_list_t *c_parts; rd_kafka_resp_err_t err; err = rd_kafka_assignment(self->rk, &c_parts); if (err) { cfl_PyErr_Format(err, "Failed to get assignment: %s", rd_kafka_err2str(err)); return NULL; } plist = c_parts_to_py(c_parts); rd_kafka_topic_partition_list_destroy(c_parts); return plist; } static PyObject *Consumer_commit (Handle *self, PyObject *args, PyObject *kwargs) { rd_kafka_resp_err_t err; PyObject *msg = NULL, *offsets = NULL, *async_o = NULL; rd_kafka_topic_partition_list_t *c_offsets; int async = 1; static char *kws[] = { "message", "offsets", "async",NULL }; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOO", kws, &msg, &offsets, &async_o)) return NULL; if (msg && offsets) { PyErr_SetString(PyExc_ValueError, "message and offsets are mutually exclusive"); return NULL; } if (async_o) async = PyObject_IsTrue(async_o); if (offsets) { if (!(c_offsets = py_to_c_parts(offsets))) return NULL; } else if (msg) { Message *m; if (PyObject_Type((PyObject *)msg) != (PyObject *)&MessageType) { PyErr_Format(PyExc_TypeError, "expected %s", MessageType.tp_name); return NULL; } m = (Message *)msg; c_offsets = rd_kafka_topic_partition_list_new(1); rd_kafka_topic_partition_list_add( c_offsets, cfl_PyUnistr_AsUTF8(m->topic), m->partition)->offset =m->offset + 1; } else { c_offsets = NULL; } err = rd_kafka_commit(self->rk, c_offsets, async); if (c_offsets) rd_kafka_topic_partition_list_destroy(c_offsets); if (err) { cfl_PyErr_Format(err, "Commit failed: %s", rd_kafka_err2str(err)); return NULL; } Py_RETURN_NONE; } static PyObject *Consumer_committed (Handle *self, PyObject *args, PyObject *kwargs) { PyObject *plist; rd_kafka_topic_partition_list_t *c_parts; rd_kafka_resp_err_t err; double tmout = -1.0f; static char *kws[] = { "partitions", "timeout", NULL }; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|d", kws, &plist, &tmout)) return NULL; if (!(c_parts = py_to_c_parts(plist))) return NULL; err = rd_kafka_committed(self->rk, c_parts, tmout >= 0 ? (int)(tmout * 1000.0f) : -1); if (err) { rd_kafka_topic_partition_list_destroy(c_parts); cfl_PyErr_Format(err, "Failed to get committed offsets: %s", rd_kafka_err2str(err)); return NULL; } plist = c_parts_to_py(c_parts); rd_kafka_topic_partition_list_destroy(c_parts); return plist; } static PyObject *Consumer_position (Handle *self, PyObject *args, PyObject *kwargs) { PyObject *plist; rd_kafka_topic_partition_list_t *c_parts; rd_kafka_resp_err_t err; static char *kws[] = { "partitions", NULL }; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", kws, &plist)) return NULL; if (!(c_parts = py_to_c_parts(plist))) return NULL; err = rd_kafka_position(self->rk, c_parts); if (err) { rd_kafka_topic_partition_list_destroy(c_parts); cfl_PyErr_Format(err, "Failed to get position: %s", rd_kafka_err2str(err)); return NULL; } plist = c_parts_to_py(c_parts); rd_kafka_topic_partition_list_destroy(c_parts); return plist; } static PyObject *Consumer_get_watermark_offsets (Handle *self, PyObject *args, PyObject *kwargs) { TopicPartition *tp; rd_kafka_resp_err_t err; double tmout = -1.0f; int cached = 0; int64_t low = RD_KAFKA_OFFSET_INVALID, high = RD_KAFKA_OFFSET_INVALID; static char *kws[] = { "partition", "timeout", "cached", NULL }; PyObject *rtup; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|db", kws, (PyObject **)&tp, &tmout, &cached)) return NULL; if (PyObject_Type((PyObject *)tp) != (PyObject *)&TopicPartitionType) { PyErr_Format(PyExc_TypeError, "expected %s", TopicPartitionType.tp_name); return NULL; } if (cached) { err = rd_kafka_get_watermark_offsets(self->rk, tp->topic, tp->partition, &low, &high); } else { err = rd_kafka_query_watermark_offsets(self->rk, tp->topic, tp->partition, &low, &high, tmout >= 0 ? (int)(tmout * 1000.0f) : -1); } if (err) { cfl_PyErr_Format(err, "Failed to get watermark offsets: %s", rd_kafka_err2str(err)); return NULL; } rtup = PyTuple_New(2); PyTuple_SetItem(rtup, 0, PyLong_FromLongLong(low)); PyTuple_SetItem(rtup, 1, PyLong_FromLongLong(high)); return rtup; } static PyObject *Consumer_poll (Handle *self, PyObject *args, PyObject *kwargs) { double tmout = -1.0f; static char *kws[] = { "timeout", NULL }; rd_kafka_message_t *rkm; PyObject *msgobj; CallState cs; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|d", kws, &tmout)) return NULL; CallState_begin(self, &cs); rkm = rd_kafka_consumer_poll(self->rk, tmout >= 0 ? (int)(tmout * 1000.0f) : -1); if (!CallState_end(self, &cs)) return NULL; if (!rkm) Py_RETURN_NONE; msgobj = Message_new0(self, rkm); rd_kafka_message_destroy(rkm); return msgobj; } static PyObject *Consumer_close (Handle *self, PyObject *ignore) { CallState cs; CallState_begin(self, &cs); rd_kafka_consumer_close(self->rk); rd_kafka_destroy(self->rk); self->rk = NULL; if (!CallState_end(self, &cs)) return NULL; Py_RETURN_NONE; } static PyMethodDef Consumer_methods[] = { { "subscribe", (PyCFunction)Consumer_subscribe, METH_VARARGS|METH_KEYWORDS, ".. py:function:: subscribe(topics, [listener=None])\n" "\n" " Set subscription to supplied list of topics\n" " This replaces a previous subscription.\n" "\n" " Regexp pattern subscriptions are supported by prefixing " "the topic string with ``\"^\"``, e.g.::\n" "\n" " consumer.subscribe([\"^my_topic.*\", \"^another[0-9]-?[a-z]+$\", \"not_a_regex\"])\n" "\n" " :param list(str) topics: List of topics (strings) to subscribe to.\n" " :param callable on_assign: callback to provide handling of " "customized offsets on completion of a successful partition " "re-assignment.\n" " :param callable on_revoke: callback to provide handling of " "offset commits to a customized store on the start of a " "rebalance operation.\n" "\n" " :raises KafkaException:\n" "\n" "\n" ".. py:function:: on_assign(consumer, partitions)\n" ".. py:function:: on_revoke(consumer, partitions)\n" "\n" " :param Consumer consumer: Consumer instance.\n" " :param list(TopicPartition) partitions: Absolute list of partitions being assigned or revoked.\n" "\n" }, { "unsubscribe", (PyCFunction)Consumer_unsubscribe, METH_NOARGS, " Remove current subscription.\n" " :raises: KafkaException\n" "\n" }, { "poll", (PyCFunction)Consumer_poll, METH_VARARGS|METH_KEYWORDS, ".. py:function:: poll([timeout=None])\n" "\n" " Consume messages, calls callbacks and returns events.\n" "\n" " The application must check the returned :py:class:`Message` " "object's :py:func:`Message.error()` method to distinguish " "between proper messages (error() returns None), or an event or " "error (see error().code() for specifics).\n" "\n" " .. note: Callbacks may be called from this method, " "such as ``on_assign``, ``on_revoke``, et.al.\n" "\n" " :param float timeout: Maximum time to block waiting for message, event or callback.\n" " :returns: A Message object or None on timeout\n" " :rtype: :py:class:`Message` or None\n" "\n" }, { "assign", (PyCFunction)Consumer_assign, METH_O, ".. py:function:: assign(partitions)\n" "\n" " Set consumer partition assignment to the provided list of " ":py:class:`TopicPartition` and starts consuming.\n" "\n" " :param list(TopicPartition) partitions: List of topic+partitions and optionally initial offsets to start consuming.\n" "\n" }, { "unassign", (PyCFunction)Consumer_unassign, METH_NOARGS, " Removes the current partition assignment and stops consuming.\n" " :raises: KafkaException\n" "\n" }, { "assignment", (PyCFunction)Consumer_assignment, METH_VARARGS|METH_KEYWORDS, ".. py:function:: assignment()\n" "\n" " Returns the current partition assignment.\n" "\n" " :returns: List of assigned topic+partitions.\n" " :rtype: list(TopicPartition)\n" " :raises: KafkaException\n" "\n" }, { "commit", (PyCFunction)Consumer_commit, METH_VARARGS|METH_KEYWORDS, ".. py:function:: commit([message=None], [offsets=None], [async=True])\n" "\n" " Commit a message or a list of offsets.\n" "\n" " ``message`` and ``offsets`` are mutually exclusive, if neither is set " "the current partition assignment's offsets are used instead. " "The consumer relies on your use of this method if you have set 'enable.auto.commit' to False\n" "\n" " :param confluent_kafka.Message message: Commit message's offset+1.\n" " :param list(TopicPartition) offsets: List of topic+partitions+offsets to commit.\n" " :param bool async: Asynchronous commit, return immediately.\n" " :rtype: None\n" " :raises: KafkaException\n" "\n" }, { "committed", (PyCFunction)Consumer_committed, METH_VARARGS|METH_KEYWORDS, ".. py:function:: committed(partitions, [timeout=None])\n" "\n" " Retrieve committed offsets for the list of partitions.\n" "\n" " :param list(TopicPartition) partitions: List of topic+partitions " "to query for stored offsets.\n" " :param float timeout: Request timeout\n" " :returns: List of topic+partitions with offset and possibly error set.\n" " :rtype: list(TopicPartition)\n" " :raises: KafkaException\n" "\n" }, { "position", (PyCFunction)Consumer_position, METH_VARARGS|METH_KEYWORDS, ".. py:function:: position(partitions, [timeout=None])\n" "\n" " Retrieve current positions (offsets) for the list of partitions.\n" "\n" " :param list(TopicPartition) partitions: List of topic+partitions " "to return current offsets for. The current offset is the offset of the " "last consumed message + 1.\n" " :returns: List of topic+partitions with offset and possibly error set.\n" " :rtype: list(TopicPartition)\n" " :raises: KafkaException\n" "\n" }, { "get_watermark_offsets", (PyCFunction)Consumer_get_watermark_offsets, METH_VARARGS|METH_KEYWORDS, ".. py:function:: get_watermark_offsets(partition, [timeout=None], [cached=False])\n" "\n" " Retrieve low and high offsets for partition.\n" "\n" " :param TopicPartition partition: Topic+partition to return offsets for." " :param float timeout: Request timeout (when cached=False).\n" " :param bool cached: Instead of querying the broker used cached information. " "Cached values: The low offset is updated periodically (if statistics.interval.ms is set) while " "the high offset is updated on each message fetched from the broker for this partition." " :returns: Tuple of (low,high) on success or None on timeout.\n" " :rtype: tuple(int,int)\n" " :raises: KafkaException\n" "\n" }, { "close", (PyCFunction)Consumer_close, METH_NOARGS, "\n" " Close down and terminate the Kafka Consumer.\n" "\n" " Actions performed:\n" "\n" " - Stops consuming\n" " - Commits offsets - except if the consumer property 'enable.auto.commit' is set to False\n" " - Leave consumer group\n" "\n" " .. note: Registered callbacks may be called from this method, " "see :py:func::`poll()` for more info.\n" "\n" " :rtype: None\n" "\n" }, { NULL } }; static void Consumer_rebalance_cb (rd_kafka_t *rk, rd_kafka_resp_err_t err, rd_kafka_topic_partition_list_t *c_parts, void *opaque) { Handle *self = opaque; CallState *cs; cs = CallState_get(self); self->u.Consumer.rebalance_assigned = 0; if ((err == RD_KAFKA_RESP_ERR__ASSIGN_PARTITIONS && self->u.Consumer.on_assign) || (err == RD_KAFKA_RESP_ERR__REVOKE_PARTITIONS && self->u.Consumer.on_revoke)) { PyObject *parts; PyObject *args, *result; /* Construct list of TopicPartition based on 'c_parts' */ parts = c_parts_to_py(c_parts); args = Py_BuildValue("(OO)", self, parts); Py_DECREF(parts); if (!args) { cfl_PyErr_Format(RD_KAFKA_RESP_ERR__FAIL, "Unable to build callback args"); CallState_crash(cs); CallState_resume(cs); return; } result = PyObject_CallObject( err == RD_KAFKA_RESP_ERR__ASSIGN_PARTITIONS ? self->u.Consumer.on_assign : self->u.Consumer.on_revoke, args); Py_DECREF(args); if (result) Py_DECREF(result); else { CallState_crash(cs); rd_kafka_yield(rk); } } /* Fallback: librdkafka needs the rebalance_cb to call assign() * to synchronize state, if the user did not do this from callback, * or there was no callback, or the callback failed, then we perform * that assign() call here instead. */ if (!self->u.Consumer.rebalance_assigned) { if (err == RD_KAFKA_RESP_ERR__ASSIGN_PARTITIONS) rd_kafka_assign(rk, c_parts); else rd_kafka_assign(rk, NULL); } CallState_resume(cs); } static void Consumer_offset_commit_cb (rd_kafka_t *rk, rd_kafka_resp_err_t err, rd_kafka_topic_partition_list_t *c_parts, void *opaque) { Handle *self = opaque; PyObject *parts, *k_err, *args, *result; CallState *cs; if (!self->u.Consumer.on_commit) return; cs = CallState_get(self); /* Insantiate error object */ k_err = KafkaError_new_or_None(err, NULL); /* Construct list of TopicPartition based on 'c_parts' */ if (c_parts) parts = c_parts_to_py(c_parts); else parts = PyList_New(0); args = Py_BuildValue("(OO)", k_err, parts); Py_DECREF(k_err); Py_DECREF(parts); if (!args) { cfl_PyErr_Format(RD_KAFKA_RESP_ERR__FAIL, "Unable to build callback args"); CallState_crash(cs); CallState_resume(cs); return; } result = PyObject_CallObject(self->u.Consumer.on_commit, args); Py_DECREF(args); if (result) Py_DECREF(result); else { CallState_crash(cs); rd_kafka_yield(rk); } CallState_resume(cs); } static int Consumer_init (PyObject *selfobj, PyObject *args, PyObject *kwargs) { Handle *self = (Handle *)selfobj; char errstr[256]; rd_kafka_conf_t *conf; if (self->rk) { PyErr_SetString(PyExc_RuntimeError, "Consumer already __init__:ialized"); return -1; } self->type = RD_KAFKA_CONSUMER; if (!(conf = common_conf_setup(RD_KAFKA_CONSUMER, self, args, kwargs))) return -1; /* Exception raised by ..conf_setup() */ rd_kafka_conf_set_rebalance_cb(conf, Consumer_rebalance_cb); rd_kafka_conf_set_offset_commit_cb(conf, Consumer_offset_commit_cb); self->rk = rd_kafka_new(RD_KAFKA_CONSUMER, conf, errstr, sizeof(errstr)); if (!self->rk) { cfl_PyErr_Format(rd_kafka_last_error(), "Failed to create consumer: %s", errstr); rd_kafka_conf_destroy(conf); return -1; } rd_kafka_poll_set_consumer(self->rk); return 0; } static PyObject *Consumer_new (PyTypeObject *type, PyObject *args, PyObject *kwargs) { return type->tp_alloc(type, 0); } PyTypeObject ConsumerType = { PyVarObject_HEAD_INIT(NULL, 0) "cimpl.Consumer", /*tp_name*/ sizeof(Handle), /*tp_basicsize*/ 0, /*tp_itemsize*/ (destructor)Consumer_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_compare*/ 0, /*tp_repr*/ 0, /*tp_as_number*/ 0, /*tp_as_sequence*/ 0, /*tp_as_mapping*/ 0, /*tp_hash */ 0, /*tp_call*/ 0, /*tp_str*/ 0, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ "High-level Kafka Consumer\n" "\n" ".. py:function:: Consumer(**kwargs)\n" "\n" " Create new Consumer instance using provided configuration dict.\n" "\n" " Special configuration properties:\n" " ``on_commit``: Optional callback will be called when a commit " "request has succeeded or failed.\n" "\n" "\n" ".. py:function:: on_commit(err, partitions)\n" "\n" " :param Consumer consumer: Consumer instance.\n" " :param KafkaError err: Commit error object, or None on success.\n" " :param list(TopicPartition) partitions: List of partitions with " "their committed offsets or per-partition errors.\n" "\n" "\n", /*tp_doc*/ (traverseproc)Consumer_traverse, /* tp_traverse */ (inquiry)Consumer_clear, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ Consumer_methods, /* tp_methods */ 0, /* tp_members */ 0, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ 0, /* tp_dictoffset */ Consumer_init, /* tp_init */ 0, /* tp_alloc */ Consumer_new /* tp_new */ }; confluent-kafka-0.11.0/confluent_kafka/src/Producer.c0000644000076600000240000004245713122216424023140 0ustar magnusstaff00000000000000/** * Copyright 2016 Confluent Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "confluent_kafka.h" /** * @brief KNOWN ISSUES * * - Partitioners will cause a dead-lock with librdkafka, because: * GIL + topic lock in topic_new is different lock order than * topic lock in msg_partitioner + GIL. * This needs to be sorted out in librdkafka, preferably making the * partitioner run without any locks taken. * Until this is fixed the partitioner is ignored and librdkafka's * default will be used. * */ /**************************************************************************** * * * Producer * * * * ****************************************************************************/ /** * Per-message state. */ struct Producer_msgstate { Handle *self; PyObject *dr_cb; PyObject *partitioner_cb; }; /** * Create a new per-message state. * Returns NULL if neither dr_cb or partitioner_cb is set. */ static __inline struct Producer_msgstate * Producer_msgstate_new (Handle *self, PyObject *dr_cb, PyObject *partitioner_cb) { struct Producer_msgstate *msgstate; if (!dr_cb && !partitioner_cb) return NULL; msgstate = calloc(1, sizeof(*msgstate)); msgstate->self = self; if (dr_cb) { msgstate->dr_cb = dr_cb; Py_INCREF(dr_cb); } if (partitioner_cb) { msgstate->partitioner_cb = partitioner_cb; Py_INCREF(partitioner_cb); } return msgstate; } static __inline void Producer_msgstate_destroy (struct Producer_msgstate *msgstate) { if (msgstate->dr_cb) Py_DECREF(msgstate->dr_cb); if (msgstate->partitioner_cb) Py_DECREF(msgstate->partitioner_cb); free(msgstate); } static int Producer_clear (Handle *self) { if (self->u.Producer.default_dr_cb) { Py_DECREF(self->u.Producer.default_dr_cb); self->u.Producer.default_dr_cb = NULL; } if (self->u.Producer.partitioner_cb) { Py_DECREF(self->u.Producer.partitioner_cb); self->u.Producer.partitioner_cb = NULL; } Handle_clear(self); return 0; } static void Producer_dealloc (Handle *self) { PyObject_GC_UnTrack(self); Producer_clear(self); if (self->rk) { CallState cs; CallState_begin(self, &cs); rd_kafka_destroy(self->rk); CallState_end(self, &cs); } Py_TYPE(self)->tp_free((PyObject *)self); } static int Producer_traverse (Handle *self, visitproc visit, void *arg) { if (self->u.Producer.default_dr_cb) Py_VISIT(self->u.Producer.default_dr_cb); if (self->u.Producer.partitioner_cb) Py_VISIT(self->u.Producer.partitioner_cb); Handle_traverse(self, visit, arg); return 0; } static void dr_msg_cb (rd_kafka_t *rk, const rd_kafka_message_t *rkm, void *opaque) { struct Producer_msgstate *msgstate = rkm->_private; Handle *self = opaque; CallState *cs; PyObject *args; PyObject *result; PyObject *msgobj; if (!msgstate) return; cs = CallState_get(self); if (!msgstate->dr_cb) { /* No callback defined */ goto done; } /* Skip callback if delivery.report.only.error=true */ if (self->u.Producer.dr_only_error && !rkm->err) goto done; msgobj = Message_new0(self, rkm); args = Py_BuildValue("(OO)", Message_error((Message *)msgobj, NULL), msgobj); Py_DECREF(msgobj); if (!args) { cfl_PyErr_Format(RD_KAFKA_RESP_ERR__FAIL, "Unable to build callback args"); CallState_crash(cs); goto done; } result = PyObject_CallObject(msgstate->dr_cb, args); Py_DECREF(args); if (result) Py_DECREF(result); else { CallState_crash(cs); rd_kafka_yield(rk); } done: Producer_msgstate_destroy(msgstate); CallState_resume(cs); } /** * FIXME: The partitioner is currently broken due to threading/GIL issues. */ int32_t Producer_partitioner_cb (const rd_kafka_topic_t *rkt, const void *keydata, size_t keylen, int32_t partition_cnt, void *rkt_opaque, void *msg_opaque) { Handle *self = rkt_opaque; struct Producer_msgstate *msgstate = msg_opaque; PyGILState_STATE gstate; PyObject *result; PyObject *args; int32_t r = RD_KAFKA_PARTITION_UA; if (!msgstate) { /* Fall back on default C partitioner if neither a per-msg * partitioner nor a default Python partitioner is available */ return self->u.Producer.c_partitioner_cb(rkt, keydata, keylen, partition_cnt, rkt_opaque, msg_opaque); } gstate = PyGILState_Ensure(); if (!msgstate->partitioner_cb) { /* Fall back on default C partitioner if neither a per-msg * partitioner nor a default Python partitioner is available */ r = msgstate->self->u.Producer.c_partitioner_cb(rkt, keydata, keylen, partition_cnt, rkt_opaque, msg_opaque); goto done; } args = Py_BuildValue("(s#l)", (const char *)keydata, (int)keylen, (long)partition_cnt); if (!args) { cfl_PyErr_Format(RD_KAFKA_RESP_ERR__FAIL, "Unable to build callback args"); goto done; } result = PyObject_CallObject(msgstate->partitioner_cb, args); Py_DECREF(args); if (result) { r = (int32_t)PyLong_AsLong(result); if (PyErr_Occurred()) printf("FIXME: partition_cb returned wrong type " "(expected long), how to propagate?\n"); Py_DECREF(result); } else { printf("FIXME: partitioner_cb crashed, how to propagate?\n"); } done: PyGILState_Release(gstate); return r; } #if HAVE_PRODUCEV static rd_kafka_resp_err_t Producer_producev (Handle *self, const char *topic, int32_t partition, const void *value, size_t value_len, const void *key, size_t key_len, void *opaque, int64_t timestamp) { return rd_kafka_producev(self->rk, RD_KAFKA_V_MSGFLAGS(RD_KAFKA_MSG_F_COPY), RD_KAFKA_V_TOPIC(topic), RD_KAFKA_V_PARTITION(partition), RD_KAFKA_V_KEY(key, (size_t)key_len), RD_KAFKA_V_VALUE((void *)value, (size_t)value_len), RD_KAFKA_V_TIMESTAMP(timestamp), RD_KAFKA_V_OPAQUE(opaque), RD_KAFKA_V_END); } #else static rd_kafka_resp_err_t Producer_produce0 (Handle *self, const char *topic, int32_t partition, const void *value, size_t value_len, const void *key, size_t key_len, void *opaque) { rd_kafka_topic_t *rkt; rd_kafka_resp_err_t err = RD_KAFKA_RESP_ERR_NO_ERROR; if (!(rkt = rd_kafka_topic_new(self->rk, topic, NULL))) return RD_KAFKA_RESP_ERR__INVALID_ARG; if (rd_kafka_produce(rkt, partition, RD_KAFKA_MSG_F_COPY, (void *)value, value_len, (void *)key, key_len, opaque) == -1) err = rd_kafka_last_error(); rd_kafka_topic_destroy(rkt); return err; } #endif static PyObject *Producer_produce (Handle *self, PyObject *args, PyObject *kwargs) { const char *topic, *value = NULL, *key = NULL; int value_len = 0, key_len = 0; int partition = RD_KAFKA_PARTITION_UA; PyObject *dr_cb = NULL, *dr_cb2 = NULL, *partitioner_cb = NULL; long long timestamp = 0; rd_kafka_resp_err_t err; struct Producer_msgstate *msgstate; static char *kws[] = { "topic", "value", "key", "partition", "callback", "on_delivery", /* Alias */ "partitioner", "timestamp", NULL }; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|z#z#iOOOL" , kws, &topic, &value, &value_len, &key, &key_len, &partition, &dr_cb, &dr_cb2, &partitioner_cb, ×tamp)) return NULL; #if !HAVE_PRODUCEV if (timestamp) { PyErr_Format(PyExc_NotImplementedError, "Producer timestamps require " "confluent-kafka-python built for librdkafka " "version >=v0.9.4 (librdkafka runtime 0x%x, " "buildtime 0x%x)", rd_kafka_version(), RD_KAFKA_VERSION); return NULL; } #endif if (dr_cb2 && !dr_cb) /* Alias */ dr_cb = dr_cb2; if (!dr_cb || dr_cb == Py_None) dr_cb = self->u.Producer.default_dr_cb; if (!partitioner_cb || partitioner_cb == Py_None) partitioner_cb = self->u.Producer.partitioner_cb; /* Create msgstate if necessary, may return NULL if no callbacks * are wanted. */ msgstate = Producer_msgstate_new(self, dr_cb, partitioner_cb); /* Produce message */ #if HAVE_PRODUCEV err = Producer_producev(self, topic, partition, value, value_len, key, key_len, msgstate, timestamp); #else err = Producer_produce0(self, topic, partition, value, value_len, key, key_len, msgstate); #endif if (err) { if (msgstate) Producer_msgstate_destroy(msgstate); if (err == RD_KAFKA_RESP_ERR__QUEUE_FULL) PyErr_Format(PyExc_BufferError, "%s", rd_kafka_err2str(err)); else cfl_PyErr_Format(err, "Unable to produce message: %s", rd_kafka_err2str(err)); return NULL; } Py_RETURN_NONE; } /** * @brief Call rd_kafka_poll() and keep track of crashing callbacks. * @returns -1 if callback crashed (or poll() failed), else the number * of events served. */ static int Producer_poll0 (Handle *self, int tmout) { int r; CallState cs; CallState_begin(self, &cs); r = rd_kafka_poll(self->rk, tmout); if (!CallState_end(self, &cs)) { return -1; } return r; } static PyObject *Producer_poll (Handle *self, PyObject *args, PyObject *kwargs) { double tmout; int r; static char *kws[] = { "timeout", NULL }; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "d", kws, &tmout)) return NULL; r = Producer_poll0(self, (int)(tmout * 1000)); if (r == -1) return NULL; return PyLong_FromLong(r); } static PyObject *Producer_flush (Handle *self, PyObject *args, PyObject *kwargs) { double tmout = -1; int qlen; static char *kws[] = { "timeout", NULL }; #if RD_KAFKA_VERSION >= 0x00090300 CallState cs; #endif if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|d", kws, &tmout)) return NULL; #if RD_KAFKA_VERSION >= 0x00090300 CallState_begin(self, &cs); rd_kafka_flush(self->rk, tmout < 0 ? -1 : (int)(tmout * 1000)); if (!CallState_end(self, &cs)) return NULL; qlen = rd_kafka_outq_len(self->rk); #else while ((qlen = rd_kafka_outq_len(self->rk)) > 0) { if (Producer_poll0(self, 500) == -1) return NULL; } #endif return PyLong_FromLong(qlen); } static PyMethodDef Producer_methods[] = { { "produce", (PyCFunction)Producer_produce, METH_VARARGS|METH_KEYWORDS, ".. py:function:: produce(topic, [value], [key], [partition], [on_delivery], [timestamp])\n" "\n" " Produce message to topic.\n" " This is an asynchronous operation, an application may use the " "``callback`` (alias ``on_delivery``) argument to pass a function " "(or lambda) that will be called from :py:func:`poll()` when the " "message has been successfully delivered or permanently fails delivery.\n" "\n" " :param str topic: Topic to produce message to\n" " :param str|bytes value: Message payload\n" " :param str|bytes key: Message key\n" " :param int partition: Partition to produce to, elses uses the " "configured partitioner.\n" " :param func on_delivery(err,msg): Delivery report callback to call " "(from :py:func:`poll()` or :py:func:`flush()`) on successful or " "failed delivery\n" " :param int timestamp: Message timestamp (CreateTime) in microseconds since epoch UTC (requires librdkafka >= v0.9.4, api.version.request=true, and broker >= 0.10.0.0). Default value is current time.\n" "\n" " :rtype: None\n" " :raises BufferError: if the internal producer message queue is " "full (``queue.buffering.max.messages`` exceeded)\n" " :raises KafkaException: for other errors, see exception code\n" " :raises NotImplementedError: if timestamp is specified without underlying library support.\n" "\n" }, { "poll", (PyCFunction)Producer_poll, METH_VARARGS|METH_KEYWORDS, ".. py:function:: poll([timeout])\n" "\n" " Polls the producer for events and calls the corresponding " "callbacks (if registered).\n" "\n" " Callbacks:\n" "\n" " - ``on_delivery`` callbacks from :py:func:`produce()`\n" " - ...\n" "\n" " :param float timeout: Maximum time to block waiting for events.\n" " :returns: Number of events processed (callbacks served)\n" " :rtype: int\n" "\n" }, { "flush", (PyCFunction)Producer_flush, METH_VARARGS|METH_KEYWORDS, ".. py:function:: flush([timeout])\n" "\n" " Wait for all messages in the Producer queue to be delivered.\n" " This is a convenience method that calls :py:func:`poll()` until " ":py:func:`len()` is zero or the optional timeout elapses.\n" "\n" " :param: float timeout: Maximum time to block (requires librdkafka >= v0.9.4).\n" " :returns: Number of messages still in queue.\n" "\n" ".. note:: See :py:func:`poll()` for a description on what " "callbacks may be triggered.\n" "\n" }, { NULL } }; static Py_ssize_t Producer__len__ (Handle *self) { return rd_kafka_outq_len(self->rk); } static PySequenceMethods Producer_seq_methods = { (lenfunc)Producer__len__ /* sq_length */ }; static int Producer_init (PyObject *selfobj, PyObject *args, PyObject *kwargs) { Handle *self = (Handle *)selfobj; char errstr[256]; rd_kafka_conf_t *conf; if (self->rk) { PyErr_SetString(PyExc_RuntimeError, "Producer already __init__:ialized"); return -1; } self->type = RD_KAFKA_PRODUCER; if (!(conf = common_conf_setup(RD_KAFKA_PRODUCER, self, args, kwargs))) return -1; rd_kafka_conf_set_dr_msg_cb(conf, dr_msg_cb); self->rk = rd_kafka_new(RD_KAFKA_PRODUCER, conf, errstr, sizeof(errstr)); if (!self->rk) { cfl_PyErr_Format(rd_kafka_last_error(), "Failed to create producer: %s", errstr); rd_kafka_conf_destroy(conf); return -1; } return 0; } static PyObject *Producer_new (PyTypeObject *type, PyObject *args, PyObject *kwargs) { return type->tp_alloc(type, 0); } PyTypeObject ProducerType = { PyVarObject_HEAD_INIT(NULL, 0) "cimpl.Producer", /*tp_name*/ sizeof(Handle), /*tp_basicsize*/ 0, /*tp_itemsize*/ (destructor)Producer_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_compare*/ 0, /*tp_repr*/ 0, /*tp_as_number*/ &Producer_seq_methods, /*tp_as_sequence*/ 0, /*tp_as_mapping*/ 0, /*tp_hash */ 0, /*tp_call*/ 0, /*tp_str*/ 0, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ "Asynchronous Kafka Producer\n" "\n" ".. py:function:: Producer(**kwargs)\n" "\n" " Create new Producer instance using provided configuration dict.\n" "\n" "\n" ".. py:function:: len()\n" "\n" " :returns: Number of messages and Kafka protocol requests waiting to be delivered to broker.\n" " :rtype: int\n" "\n", /*tp_doc*/ (traverseproc)Producer_traverse, /* tp_traverse */ (inquiry)Producer_clear, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ Producer_methods, /* tp_methods */ 0, /* tp_members */ 0, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ 0, /* tp_dictoffset */ Producer_init, /* tp_init */ 0, /* tp_alloc */ Producer_new /* tp_new */ }; confluent-kafka-0.11.0/confluent_kafka.egg-info/0000755000076600000240000000000013135737034022111 5ustar magnusstaff00000000000000confluent-kafka-0.11.0/confluent_kafka.egg-info/dependency_links.txt0000644000076600000240000000000113135737034026157 0ustar magnusstaff00000000000000 confluent-kafka-0.11.0/confluent_kafka.egg-info/PKG-INFO0000644000076600000240000000044313135737034023207 0ustar magnusstaff00000000000000Metadata-Version: 1.0 Name: confluent-kafka Version: 0.11.0 Summary: Confluent's Apache Kafka client for Python Home-page: https://github.com/confluentinc/confluent-kafka-python Author: Confluent Inc Author-email: support@confluent.io License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN confluent-kafka-0.11.0/confluent_kafka.egg-info/requires.txt0000644000076600000240000000003713135737034024511 0ustar magnusstaff00000000000000 [avro] fastavro requests avro confluent-kafka-0.11.0/confluent_kafka.egg-info/SOURCES.txt0000644000076600000240000000211413135737034023773 0ustar magnusstaff00000000000000LICENSE MANIFEST.in README.md setup.py confluent_kafka/__init__.py confluent_kafka.egg-info/PKG-INFO confluent_kafka.egg-info/SOURCES.txt confluent_kafka.egg-info/dependency_links.txt confluent_kafka.egg-info/requires.txt confluent_kafka.egg-info/top_level.txt confluent_kafka/avro/__init__.py confluent_kafka/avro/cached_schema_registry_client.py confluent_kafka/avro/error.py confluent_kafka/avro/load.py confluent_kafka/avro/serializer/__init__.py confluent_kafka/avro/serializer/message_serializer.py confluent_kafka/kafkatest/__init__.py confluent_kafka/kafkatest/verifiable_client.py confluent_kafka/kafkatest/verifiable_consumer.py confluent_kafka/kafkatest/verifiable_producer.py confluent_kafka/src/Consumer.c confluent_kafka/src/Producer.c confluent_kafka/src/confluent_kafka.c confluent_kafka/src/confluent_kafka.h tests/avro/__init__.py tests/avro/data_gen.py tests/avro/mock_registry.py tests/avro/mock_schema_registry_client.py tests/avro/test_avro_producer.py tests/avro/test_cached_client.py tests/avro/test_message_serializer.py tests/avro/test_mock_client.py tests/avro/test_util.pyconfluent-kafka-0.11.0/confluent_kafka.egg-info/top_level.txt0000644000076600000240000000002613135737034024641 0ustar magnusstaff00000000000000confluent_kafka tests confluent-kafka-0.11.0/LICENSE0000644000076600000240000002607513042650774016306 0ustar magnusstaff00000000000000Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "{}" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright {yyyy} {name of copyright owner} Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. confluent-kafka-0.11.0/MANIFEST.in0000644000076600000240000000010513042650774017021 0ustar magnusstaff00000000000000include README.md include LICENSE include confluent_kafka/src/*.[ch] confluent-kafka-0.11.0/PKG-INFO0000644000076600000240000000044313135737034016363 0ustar magnusstaff00000000000000Metadata-Version: 1.0 Name: confluent-kafka Version: 0.11.0 Summary: Confluent's Apache Kafka client for Python Home-page: https://github.com/confluentinc/confluent-kafka-python Author: Confluent Inc Author-email: support@confluent.io License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN confluent-kafka-0.11.0/README.md0000644000076600000240000001504313107254202016535 0ustar magnusstaff00000000000000Confluent's Python Client for Apache KafkaTM ======================================================= **confluent-kafka-python** is Confluent's Python client for [Apache Kafka](http://kafka.apache.org/) and the [Confluent Platform](https://www.confluent.io/product/compare/). Features: - **High performance** - confluent-kafka-python is a lightweight wrapper around [librdkafka](https://github.com/edenhill/librdkafka), a finely tuned C client. - **Reliability** - There are a lot of details to get right when writing an Apache Kafka client. We get them right in one place (librdkafka) and leverage this work across all of our clients (also [confluent-kafka-go](https://github.com/confluentinc/confluent-kafka-go) and [confluent-kafka-dotnet](https://github.com/confluentinc/confluent-kafka-dotnet)). - **Supported** - Commercial support is offered by [Confluent](https://confluent.io/). - **Future proof** - Confluent, founded by the creators of Kafka, is building a [streaming platform](https://www.confluent.io/product/compare/) with Apache Kafka at its core. It's high priority for us that client features keep pace with core Apache Kafka and components of the [Confluent Platform](https://www.confluent.io/product/compare/). The Python bindings provides a high-level Producer and Consumer with support for the balanced consumer groups of Apache Kafka 0.9. See the [API documentation](http://docs.confluent.io/current/clients/confluent-kafka-python/index.html) for more info. **License**: [Apache License v2.0](http://www.apache.org/licenses/LICENSE-2.0) Usage ===== **Producer:** ```python from confluent_kafka import Producer p = Producer({'bootstrap.servers': 'mybroker,mybroker2'}) for data in some_data_source: p.produce('mytopic', data.encode('utf-8')) p.flush() ``` **High-level Consumer:** ```python from confluent_kafka import Consumer, KafkaError c = Consumer({'bootstrap.servers': 'mybroker', 'group.id': 'mygroup', 'default.topic.config': {'auto.offset.reset': 'smallest'}}) c.subscribe(['mytopic']) running = True while running: msg = c.poll() if not msg.error(): print('Received message: %s' % msg.value().decode('utf-8')) elif msg.error().code() != KafkaError._PARTITION_EOF: print(msg.error()) running = False c.close() ``` **AvroProducer** ```python from confluent_kafka import avro from confluent_kafka.avro import AvroProducer value_schema = avro.load('ValueSchema.avsc') key_schema = avro.load('KeySchema.avsc') value = {"name": "Value"} key = {"name": "Key"} avroProducer = AvroProducer({'bootstrap.servers': 'mybroker,mybroker2', 'schema.registry.url': 'http://schem_registry_host:port'}, default_key_schema=key_schema, default_value_schema=value_schema) avroProducer.produce(topic='my_topic', value=value, key=key) ``` **AvroConsumer** ```python from confluent_kafka import KafkaError from confluent_kafka.avro import AvroConsumer from confluent_kafka.avro.serializer import SerializerError c = AvroConsumer({'bootstrap.servers': 'mybroker,mybroker2', 'group.id': 'groupid', 'schema.registry.url': 'http://127.0.0.1:8081'}) c.subscribe(['my_topic']) running = True while running: try: msg = c.poll(10) if msg: if not msg.error(): print(msg.value()) elif msg.error().code() != KafkaError._PARTITION_EOF: print(msg.error()) running = False except SerializerError as e: print("Message deserialization failed for %s: %s" % (msg, e)) running = False c.close() ``` See [examples](examples) for more examples. Broker Compatibility ==================== The Python client (as well as the underlying C library librdkafka) supports all broker versions >= 0.8. But due to the nature of the Kafka protocol in broker versions 0.8 and 0.9 it is not safe for a client to assume what protocol version is actually supported by the broker, thus you will need to hint the Python client what protocol version it may use. This is done through two configuration settings: * `broker.version.fallback=YOUR_BROKER_VERSION` (default 0.9.0.1) * `api.version.request=true|false` (default false) When using a Kafka 0.10 broker or later you only need to set `api.version.request=true`. If you use Kafka broker 0.9 or 0.8 you should leave `api.version.request=false` (default) and set `broker.version.fallback` to your broker version, e.g `broker.version.fallback=0.9.0.1`. More info here: https://github.com/edenhill/librdkafka/wiki/Broker-version-compatibility Prerequisites ============= * Python >= 2.7 or Python 3.x * [librdkafka](https://github.com/edenhill/librdkafka) >= 0.9.1 For **Debian/Ubuntu**** based systems, add this APT repo and then do `sudo apt-get install librdkafka-dev python-dev`: http://docs.confluent.io/current/installation.html#installation-apt For **RedHat** and **RPM**-based distros, add this YUM repo and then do `sudo yum install librdkafka-devel python-devel`: http://docs.confluent.io/current/installation.html#rpm-packages-via-yum On **OSX**, use **homebrew** and do `sudo brew install librdkafka` Install ======= **Install from PyPi:** $ pip install confluent-kafka # for AvroProducer or AvroConsumer $ pip install confluent-kafka[avro] **Install from source / tarball:** $ pip install . # for AvroProducer or AvroConsumer $ pip install .[avro] Build ===== $ python setup.py build If librdkafka is installed in a non-standard location provide the include and library directories with: $ C_INCLUDE_PATH=/path/to/include LIBRARY_PATH=/path/to/lib python setup.py ... Tests ===== **Run unit-tests:** In order to run full test suite, simply execute: $ tox -r **NOTE**: Requires `tox` (please install with `pip install tox`), several supported versions of Python on your path, and `librdkafka` [installed](tools/bootstrap-librdkafka.sh) into `tmp-build`. **Run integration tests:** To run the integration tests, uncomment the following line from `tox.ini` and add the paths to your Kafka and Confluent Schema Registry instances. You can also run the integration tests outside of `tox` by running this command from the source root. examples/integration_test.py [] [] **WARNING**: These tests require an active Kafka cluster and will create new topics. Generate Documentation ====================== Install sphinx and sphinx_rtd_theme packages: $ pip install sphinx sphinx_rtd_theme Build HTML docs: $ make docs or: $ python setup.py build_sphinx Documentation will be generated in `docs/_build/`. confluent-kafka-0.11.0/setup.cfg0000644000076600000240000000004613135737034017106 0ustar magnusstaff00000000000000[egg_info] tag_build = tag_date = 0 confluent-kafka-0.11.0/setup.py0000755000076600000240000000164413122216424016776 0ustar magnusstaff00000000000000#!/usr/bin/env python from setuptools import setup, find_packages from distutils.core import Extension import sys if sys.version_info[0] < 3: avro = 'avro' else: avro = 'avro-python3' module = Extension('confluent_kafka.cimpl', libraries=['rdkafka'], sources=['confluent_kafka/src/confluent_kafka.c', 'confluent_kafka/src/Producer.c', 'confluent_kafka/src/Consumer.c']) setup(name='confluent-kafka', version='0.11.0', description='Confluent\'s Apache Kafka client for Python', author='Confluent Inc', author_email='support@confluent.io', url='https://github.com/confluentinc/confluent-kafka-python', ext_modules=[module], packages=find_packages(exclude=("tests",)), data_files=[('', ['LICENSE'])], extras_require={ 'avro': ['fastavro', 'requests', avro] }) confluent-kafka-0.11.0/tests/0000755000076600000240000000000013135737034016427 5ustar magnusstaff00000000000000confluent-kafka-0.11.0/tests/avro/0000755000076600000240000000000013135737034017376 5ustar magnusstaff00000000000000confluent-kafka-0.11.0/tests/avro/__init__.py0000644000076600000240000000000013042650774021477 0ustar magnusstaff00000000000000confluent-kafka-0.11.0/tests/avro/data_gen.py0000644000076600000240000000511513107245476021517 0ustar magnusstaff00000000000000#!/usr/bin/env python # # Copyright 2016 Confluent Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # derived from https://github.com/verisign/python-confluent-schemaregistry.git # import os import os.path import random from avro import schema from avro.datafile import DataFileWriter from avro.io import DatumWriter NAMES = ['stefan', 'melanie', 'nick', 'darrel', 'kent', 'simon'] AGES = list(range(1, 10)) + [None] def get_schema_path(fname): dname = os.path.dirname(os.path.realpath(__file__)) return os.path.join(dname, fname) def load_schema_file(fname): fname = get_schema_path(fname) with open(fname) as f: return f.read() avsc_dir = os.path.dirname(os.path.realpath(__file__)) BASIC_SCHEMA = load_schema_file(os.path.join(avsc_dir, 'basic_schema.avsc')) def create_basic_item(i): return { 'name': random.choice(NAMES) + '-' + str(i), 'number': random.choice(AGES) } BASIC_ITEMS = map(create_basic_item, range(1, 20)) ADVANCED_SCHEMA = load_schema_file(os.path.join(avsc_dir, 'adv_schema.avsc')) def create_adv_item(i): friends = map(create_basic_item, range(1, 3)) family = map(create_basic_item, range(1, 3)) basic = create_basic_item(i) basic['family'] = dict(map(lambda bi: (bi['name'], bi), family)) basic['friends'] = dict(map(lambda bi: (bi['name'], bi), friends)) return basic ADVANCED_ITEMS = map(create_adv_item, range(1, 20)) def _write_items(base_name, schema_str, items): avro_schema = schema.Parse(schema_str) avro_file = base_name + '.avro' with DataFileWriter(open(avro_file, "w"), DatumWriter(), avro_schema) as writer: for i in items: writer.append(i) writer.close return (avro_file) def write_basic_items(base_name): return _write_items(base_name, BASIC_SCHEMA, BASIC_ITEMS) def write_advanced_items(base_name): return _write_items(base_name, ADVANCED_SCHEMA, ADVANCED_ITEMS) def cleanup(files): for f in files: try: os.remove(f) except OSError: pass if __name__ == "__main__": write_advanced_items("advanced") confluent-kafka-0.11.0/tests/avro/mock_registry.py0000644000076600000240000001336213107245476022641 0ustar magnusstaff00000000000000#!/usr/bin/env python # # Copyright 2016 Confluent Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # derived from https://github.com/verisign/python-confluent-schemaregistry.git # import sys import json import re from threading import Thread from tests.avro.mock_schema_registry_client import MockSchemaRegistryClient from confluent_kafka import avro if sys.version_info[0] < 3: import BaseHTTPServer as HTTPSERVER else: import http.server as HTTPSERVER class ReqHandler(HTTPSERVER.BaseHTTPRequestHandler): protocol_version = "HTTP/1.0" def do_GET(self): self.server._run_routes(self) def do_POST(self): self.server._run_routes(self) def log_message(self, format, *args): pass class MockServer(HTTPSERVER.HTTPServer, object): def __init__(self, *args, **kwargs): super(MockServer, self).__init__(*args, **kwargs) self.counts = {} self.registry = MockSchemaRegistryClient() self.schema_cache = {} self.all_routes = { 'GET': [ (r"/schemas/ids/(\d+)", 'get_schema_by_id'), (r"/subjects/(\w+)/versions/latest", 'get_latest') ], 'POST': [ (r"/subjects/(\w+)/versions", 'register'), (r"/subjects/(\w+)", 'get_version') ] } def _send_response(self, resp, status, body): resp.send_response(status) resp.send_header("Content-Type", "application/json") resp.end_headers() resp.wfile.write(json.dumps(body).encode()) resp.finish() def _create_error(self, msg, status=400, err_code=1): return (status, { "error_code": err_code, "message": msg }) def _run_routes(self, req): self.add_count((req.command, req.path)) routes = self.all_routes.get(req.command, []) for r in routes: m = re.match(r[0], req.path) if m: func = getattr(self, r[1]) status, body = func(req, m.groups()) return self._send_response(req, status, body) # here means we got a bad req status, body = self._create_error("bad path specified") self._send_response(req, status, body) def get_schema_by_id(self, req, groups): schema_id = int(groups[0]) schema = self.registry.get_by_id(schema_id) if not schema: return self._create_error("schema not found", 404) result = { "schema": json.dumps(schema.to_json()) } return (200, result) def _get_identity_schema(self, avro_schema): # normalized schema_str = json.dumps(avro_schema.to_json()) if schema_str in self.schema_cache: return self.schema_cache[schema_str] self.schema_cache[schema_str] = avro_schema return avro_schema def _get_schema_from_body(self, req): length = int(req.headers['content-length']) data = req.rfile.read(length) data = json.loads(data.decode("utf-8")) schema = data.get("schema", None) if not schema: return None try: avro_schema = avro.loads(schema) return self._get_identity_schema(avro_schema) except: return None def register(self, req, groups): avro_schema = self._get_schema_from_body(req) if not avro_schema: return self._create_error("Invalid avro schema", 422, 42201) subject = groups[0] schema_id = self.registry.register(subject, avro_schema) return (200, {'id': schema_id}) def get_version(self, req, groups): avro_schema = self._get_schema_from_body(req) if not avro_schema: return self._create_error("Invalid avro schema", 422, 42201) subject = groups[0] version = self.registry.get_version(subject, avro_schema) if version == -1: return self._create_error("Not found", 404) schema_id = self.registry.get_id_for_schema(subject, avro_schema) result = { "schema": json.dumps(avro_schema.to_json()), "subject": subject, "id": schema_id, "version": version } return (200, result) def get_latest(self, req, groups): subject = groups[0] schema_id, avro_schema, version = self.registry.get_latest_schema(subject) if schema_id is None: return self._create_error("Not found", 404) result = { "schema": json.dumps(avro_schema.to_json()), "subject": subject, "id": schema_id, "version": version } return (200, result) def add_count(self, path): if path not in self.counts: self.counts[path] = 0 self.counts[path] += 1 class ServerThread(Thread): def __init__(self, port): Thread.__init__(self) self.server = None self.port = port self.daemon = True def run(self): self.server = MockServer(('127.0.0.1', self.port), ReqHandler) self.server.serve_forever() def shutdown(self): if self.server: self.server.shutdown() self.server.socket.close() if __name__ == '__main__': s = ServerThread(0) s.start() confluent-kafka-0.11.0/tests/avro/mock_schema_registry_client.py0000644000076600000240000001161113042650774025511 0ustar magnusstaff00000000000000#!/usr/bin/env python # # Copyright 2016 Confluent Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # derived from https://github.com/verisign/python-confluent-schemaregistry.git # from confluent_kafka.avro import ClientError class MockSchemaRegistryClient(object): """ A client that acts as a schema registry locally. Compatibiity related methods are not implemented at this time. """ def __init__(self, max_schemas_per_subject=1000): self.max_schemas_per_subject = max_schemas_per_subject # subj => { schema => id } self.subject_to_schema_ids = {} # id => avro_schema self.id_to_schema = {} # subj => { schema => version } self.subject_to_schema_versions = {} self.subject_to_latest_schema = {} # counters self.next_id = 1 self.schema_to_id = {} def _get_next_id(self, schema): if schema in self.schema_to_id: return self.schema_to_id[schema] result = self.next_id self.next_id += 1 self.schema_to_id[schema] = result return result def _get_next_version(self, subject): if subject not in self.subject_to_schema_versions: self.subject_to_schema_versions[subject] = {} return len(self.subject_to_schema_versions[subject]) def _get_all_versions(self, subject): versions = self.subject_to_schema_versions.get(subject, {}) return sorted(versions) def _add_to_cache(self, cache, subject, schema, value): if subject not in cache: cache[subject] = {} sub_cache = cache[subject] sub_cache[schema] = value def _cache_schema(self, schema, schema_id, subject, version): # don't overwrite anything if schema_id in self.id_to_schema: schema = self.id_to_schema[schema_id] else: self.id_to_schema[schema_id] = schema self._add_to_cache(self.subject_to_schema_ids, subject, schema, schema_id) self._add_to_cache(self.subject_to_schema_versions, subject, schema, version) if subject in self.subject_to_latest_schema: si, s, v = self.subject_to_latest_schema[subject] if v > version: return self.subject_to_latest_schema[subject] = (schema_id, schema, version) def register(self, subject, avro_schema): """ Register a schema with the registry under the given subject and receive a schema id. avro_schema must be a parsed schema from the python avro library Multiple instances of the same schema will result in inconsistencies. """ schemas_to_id = self.subject_to_schema_ids.get(subject, {}) schema_id = schemas_to_id.get(avro_schema, -1) if schema_id != -1: return schema_id # add it version = self._get_next_version(subject) schema_id = self._get_next_id(avro_schema) # cache it self._cache_schema(avro_schema, schema_id, subject, version) return schema_id def get_by_id(self, schema_id): """Retrieve a parsed avro schema by id or None if not found""" return self.id_to_schema.get(schema_id, None) def get_latest_schema(self, subject): """ Return the latest 3-tuple of: (the schema id, the parsed avro schema, the schema version) for a particular subject. If the subject is not found, (None,None,None) is returned. """ return self.subject_to_latest_schema.get(subject, (None, None, None)) def get_version(self, subject, avro_schema): """ Get the version of a schema for a given subject. Returns -1 if not found. """ schemas_to_version = self.subject_to_schema_versions.get(subject, {}) return schemas_to_version.get(avro_schema, -1) def get_id_for_schema(self, subject, avro_schema): """ Get the ID of a parsed schema """ schemas_to_id = self.subject_to_schema_ids.get(subject, {}) return schemas_to_id.get(avro_schema, -1) def test_compatibility(self, subject, avro_schema, version='latest'): raise ClientError("not implemented") def update_compatibility(self, level, subject=None): raise ClientError("not implemented") def get_compatibility(self, subject=None): raise ClientError("not implemented") confluent-kafka-0.11.0/tests/avro/test_avro_producer.py0000644000076600000240000001160113107245476023663 0ustar magnusstaff00000000000000#!/usr/bin/env python # # Copyright 2016 Confluent Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # derived from https://github.com/verisign/python-confluent-schemaregistry.git # import os from confluent_kafka import avro from requests.exceptions import ConnectionError import unittest from confluent_kafka.avro import AvroProducer from confluent_kafka.avro.serializer import (KeySerializerError, ValueSerializerError) from tests.avro.mock_schema_registry_client import MockSchemaRegistryClient avsc_dir = os.path.dirname(os.path.realpath(__file__)) class TestAvroProducer(unittest.TestCase): def test_instantiation(self): obj = AvroProducer({'schema.registry.url': 'http://127.0.0.1:0'}) self.assertTrue(isinstance(obj, AvroProducer)) self.assertNotEqual(obj, None) def test_produce_no_key(self): value_schema = avro.load(os.path.join(avsc_dir, "basic_schema.avsc")) producer = AvroProducer({'schema.registry.url': 'http://127.0.0.1:9001'}, default_value_schema=value_schema) with self.assertRaises(ConnectionError): # Unexistent schema-registry producer.produce(topic='test', value={"name": 'abc"'}) def test_produce_no_value(self): key_schema = avro.load(os.path.join(avsc_dir, "basic_schema.avsc")) producer = AvroProducer({'schema.registry.url': 'http://127.0.0.1:9001'}, default_key_schema=key_schema) with self.assertRaises(ConnectionError): # Unexistent schema-registry producer.produce(topic='test', key={"name": 'abc"'}) def test_produce_no_value_schema(self): producer = AvroProducer({'schema.registry.url': 'http://127.0.0.1:9001'}) with self.assertRaises(ValueSerializerError): # Producer should not accept a value with no schema producer.produce(topic='test', value={"name": 'abc"'}) def test_produce_no_key_schema(self): producer = AvroProducer({'schema.registry.url': 'http://127.0.0.1:9001'}) with self.assertRaises(KeySerializerError): # If the key is provided as a dict an avro schema must also be provided producer.produce(topic='test', key={"name": 'abc"'}) def test_produce_value_and_key_schemas(self): value_schema = avro.load(os.path.join(avsc_dir, "basic_schema.avsc")) producer = AvroProducer({'schema.registry.url': 'http://127.0.0.1:9001'}, default_value_schema=value_schema, default_key_schema=value_schema) with self.assertRaises(ConnectionError): # Unexistent schema-registry producer.produce(topic='test', value={"name": 'abc"'}, key={"name": 'abc"'}) def test_produce_primitive_string_key(self): value_schema = avro.load(os.path.join(avsc_dir, "basic_schema.avsc")) key_schema = avro.load(os.path.join(avsc_dir, "primitive_string.avsc")) producer = AvroProducer({'schema.registry.url': 'http://127.0.0.1:9001'}) with self.assertRaises(ConnectionError): # Unexistent schema-registry producer.produce(topic='test', value={"name": 'abc"'}, value_schema=value_schema, key='mykey', key_schema=key_schema) def test_produce_primitive_key_and_value(self): value_schema = avro.load(os.path.join(avsc_dir, "primitive_float.avsc")) key_schema = avro.load(os.path.join(avsc_dir, "primitive_string.avsc")) producer = AvroProducer({'schema.registry.url': 'http://127.0.0.1:9001'}) with self.assertRaises(ConnectionError): # Unexistent schema-registry producer.produce(topic='test', value=32., value_schema=value_schema, key='mykey', key_schema=key_schema) def test_produce_with_custom_registry(self): schema_registry = MockSchemaRegistryClient() value_schema = avro.load(os.path.join(avsc_dir, "basic_schema.avsc")) key_schema = avro.load(os.path.join(avsc_dir, "primitive_string.avsc")) producer = AvroProducer({}, schema_registry=schema_registry) producer.produce(topic='test', value={"name": 'abc"'}, value_schema=value_schema, key='mykey', key_schema=key_schema) def test_produce_with_custom_registry_and_registry_url(self): schema_registry = MockSchemaRegistryClient() with self.assertRaises(ValueError): AvroProducer({'schema.registry.url': 'http://127.0.0.1:9001'}, schema_registry=schema_registry) confluent-kafka-0.11.0/tests/avro/test_cached_client.py0000644000076600000240000001061013042650774023554 0ustar magnusstaff00000000000000#!/usr/bin/env python # # Copyright 2016 Confluent Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # derived from https://github.com/verisign/python-confluent-schemaregistry.git # import time import unittest from tests.avro import mock_registry from tests.avro import data_gen from confluent_kafka.avro.cached_schema_registry_client import CachedSchemaRegistryClient from confluent_kafka import avro class TestCacheSchemaRegistryClient(unittest.TestCase): def setUp(self): self.server = mock_registry.ServerThread(0) self.server.start() time.sleep(1) self.client = CachedSchemaRegistryClient('http://127.0.0.1:' + str(self.server.server.server_port)) def tearDown(self): self.server.shutdown() self.server.join() def test_register(self): parsed = avro.loads(data_gen.BASIC_SCHEMA) client = self.client schema_id = client.register('test', parsed) self.assertTrue(schema_id > 0) self.assertEqual(len(client.id_to_schema), 1) def test_multi_subject_register(self): parsed = avro.loads(data_gen.BASIC_SCHEMA) client = self.client schema_id = client.register('test', parsed) self.assertTrue(schema_id > 0) # register again under different subject dupe_id = client.register('other', parsed) self.assertEqual(schema_id, dupe_id) self.assertEqual(len(client.id_to_schema), 1) def test_dupe_register(self): parsed = avro.loads(data_gen.BASIC_SCHEMA) subject = 'test' client = self.client schema_id = client.register(subject, parsed) self.assertTrue(schema_id > 0) latest = client.get_latest_schema(subject) # register again under same subject dupe_id = client.register(subject, parsed) self.assertEqual(schema_id, dupe_id) dupe_latest = client.get_latest_schema(subject) self.assertEqual(latest, dupe_latest) def assertLatest(self, meta_tuple, sid, schema, version): self.assertNotEqual(sid, -1) self.assertNotEqual(version, -1) self.assertEqual(meta_tuple[0], sid) self.assertEqual(meta_tuple[1], schema) self.assertEqual(meta_tuple[2], version) def test_getters(self): parsed = avro.loads(data_gen.BASIC_SCHEMA) client = self.client subject = 'test' version = client.get_version(subject, parsed) self.assertEqual(version, None) schema = client.get_by_id(1) self.assertEqual(schema, None) latest = client.get_latest_schema(subject) self.assertEqual(latest, (None, None, None)) # register schema_id = client.register(subject, parsed) latest = client.get_latest_schema(subject) version = client.get_version(subject, parsed) self.assertLatest(latest, schema_id, parsed, version) fetched = client.get_by_id(schema_id) self.assertEqual(fetched, parsed) def test_multi_register(self): basic = avro.loads(data_gen.BASIC_SCHEMA) adv = avro.loads(data_gen.ADVANCED_SCHEMA) subject = 'test' client = self.client id1 = client.register(subject, basic) latest1 = client.get_latest_schema(subject) v1 = client.get_version(subject, basic) self.assertLatest(latest1, id1, basic, v1) id2 = client.register(subject, adv) latest2 = client.get_latest_schema(subject) v2 = client.get_version(subject, adv) self.assertLatest(latest2, id2, adv, v2) self.assertNotEqual(id1, id2) self.assertNotEqual(latest1, latest2) # ensure version is higher self.assertTrue(latest1[2] < latest2[2]) client.register(subject, basic) latest3 = client.get_latest_schema(subject) # latest should not change with a re-reg self.assertEqual(latest2, latest3) def hash_func(self): return hash(str(self)) confluent-kafka-0.11.0/tests/avro/test_message_serializer.py0000644000076600000240000000564413107245476024700 0ustar magnusstaff00000000000000#!/usr/bin/env python # # Copyright 2016 Confluent Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # derived from https://github.com/verisign/python-confluent-schemaregistry.git # import struct import unittest from tests.avro import data_gen from confluent_kafka.avro.serializer.message_serializer import MessageSerializer from tests.avro.mock_schema_registry_client import MockSchemaRegistryClient from confluent_kafka import avro class TestMessageSerializer(unittest.TestCase): def setUp(self): # need to set up the serializer self.client = MockSchemaRegistryClient() self.ms = MessageSerializer(self.client) def assertMessageIsSame(self, message, expected, schema_id): self.assertTrue(message) self.assertTrue(len(message) > 5) magic, sid = struct.unpack('>bI', message[0:5]) self.assertEqual(magic, 0) self.assertEqual(sid, schema_id) decoded = self.ms.decode_message(message) self.assertTrue(decoded) self.assertEqual(decoded, expected) def test_encode_with_schema_id(self): adv = avro.loads(data_gen.ADVANCED_SCHEMA) basic = avro.loads(data_gen.BASIC_SCHEMA) subject = 'test' schema_id = self.client.register(subject, basic) records = data_gen.BASIC_ITEMS for record in records: message = self.ms.encode_record_with_schema_id(schema_id, record) self.assertMessageIsSame(message, record, schema_id) subject = 'test_adv' adv_schema_id = self.client.register(subject, adv) self.assertNotEqual(adv_schema_id, schema_id) records = data_gen.ADVANCED_ITEMS for record in records: message = self.ms.encode_record_with_schema_id(adv_schema_id, record) self.assertMessageIsSame(message, record, adv_schema_id) def test_encode_record_with_schema(self): topic = 'test' basic = avro.loads(data_gen.BASIC_SCHEMA) subject = 'test-value' schema_id = self.client.register(subject, basic) records = data_gen.BASIC_ITEMS for record in records: message = self.ms.encode_record_with_schema(topic, basic, record) self.assertMessageIsSame(message, record, schema_id) def test_decode_none(self): """"null/None messages should decode to None""" self.assertIsNone(self.ms.decode_message(None)) def hash_func(self): return hash(str(self)) confluent-kafka-0.11.0/tests/avro/test_mock_client.py0000644000076600000240000001012113042650774023273 0ustar magnusstaff00000000000000#!/usr/bin/env python # # Copyright 2016 Confluent Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # derived from https://github.com/verisign/python-confluent-schemaregistry.git # import unittest from tests.avro import data_gen from tests.avro.mock_schema_registry_client import MockSchemaRegistryClient from confluent_kafka import avro class TestMockSchemaRegistryClient(unittest.TestCase): def setUp(self): self.client = MockSchemaRegistryClient() def test_register(self): parsed = avro.loads(data_gen.BASIC_SCHEMA) client = self.client schema_id = client.register('test', parsed) self.assertTrue(schema_id > 0) self.assertEqual(len(client.id_to_schema), 1) def test_multi_subject_register(self): parsed = avro.loads(data_gen.BASIC_SCHEMA) client = self.client schema_id = client.register('test', parsed) self.assertTrue(schema_id > 0) # register again under different subject dupe_id = client.register('other', parsed) self.assertEqual(schema_id, dupe_id) self.assertEqual(len(client.id_to_schema), 1) def test_dupe_register(self): parsed = avro.loads(data_gen.BASIC_SCHEMA) subject = 'test' client = self.client schema_id = client.register(subject, parsed) self.assertTrue(schema_id > 0) latest = client.get_latest_schema(subject) # register again under same subject dupe_id = client.register(subject, parsed) self.assertEqual(schema_id, dupe_id) dupe_latest = client.get_latest_schema(subject) self.assertEqual(latest, dupe_latest) def assertLatest(self, meta_tuple, sid, schema, version): self.assertNotEqual(sid, -1) self.assertNotEqual(version, -1) self.assertEqual(meta_tuple[0], sid) self.assertEqual(meta_tuple[1], schema) self.assertEqual(meta_tuple[2], version) def test_getters(self): parsed = avro.loads(data_gen.BASIC_SCHEMA) client = self.client subject = 'test' version = client.get_version(subject, parsed) self.assertEqual(version, -1) schema = client.get_by_id(1) self.assertEqual(schema, None) latest = client.get_latest_schema(subject) self.assertEqual(latest, (None, None, None)) # register schema_id = client.register(subject, parsed) latest = client.get_latest_schema(subject) version = client.get_version(subject, parsed) self.assertLatest(latest, schema_id, parsed, version) fetched = client.get_by_id(schema_id) self.assertEqual(fetched, parsed) def test_multi_register(self): basic = avro.loads(data_gen.BASIC_SCHEMA) adv = avro.loads(data_gen.ADVANCED_SCHEMA) subject = 'test' client = self.client id1 = client.register(subject, basic) latest1 = client.get_latest_schema(subject) v1 = client.get_version(subject, basic) self.assertLatest(latest1, id1, basic, v1) id2 = client.register(subject, adv) latest2 = client.get_latest_schema(subject) v2 = client.get_version(subject, adv) self.assertLatest(latest2, id2, adv, v2) self.assertNotEqual(id1, id2) self.assertNotEqual(latest1, latest2) # ensure version is higher self.assertTrue(latest1[2] < latest2[2]) client.register(subject, basic) latest3 = client.get_latest_schema(subject) # latest should not change with a re-reg self.assertEqual(latest2, latest3) def hash_func(self): return hash(str(self)) confluent-kafka-0.11.0/tests/avro/test_util.py0000644000076600000240000000217313042650774021771 0ustar magnusstaff00000000000000#!/usr/bin/env python # # Copyright 2016 Confluent Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # derived from https://github.com/verisign/python-confluent-schemaregistry.git # import unittest from avro import schema from tests.avro import data_gen from confluent_kafka import avro class TestUtil(unittest.TestCase): def test_schema_from_string(self): parsed = avro.loads(data_gen.BASIC_SCHEMA) self.assertTrue(isinstance(parsed, schema.Schema)) def test_schema_from_file(self): parsed = avro.load(data_gen.get_schema_path('adv_schema.avsc')) self.assertTrue(isinstance(parsed, schema.Schema))