pax_global_header00006660000000000000000000000064116202705060014511gustar00rootroot0000000000000052 comment=c3d757347230b856c83c4e40759f842b4b22de38 ckanclient_0.9/000077500000000000000000000000001162027050600135565ustar00rootroot00000000000000ckanclient_0.9/.hgignore000066400000000000000000000000601162027050600153550ustar00rootroot00000000000000syntax: glob *.pyc ckanclient.egg-info/* dist/*ckanclient_0.9/README.txt000066400000000000000000000011361162027050600152550ustar00rootroot00000000000000ckanclient is a Python module to read and write to a CKAN server via the API. Usage ===== To see how to use the ckanclient, see the docs in __init__.py Retrieval ========= You can download releases of ckanclient from PyPI:: or you can get the latest repository using Mercurial:: hg clone https://knowledgeforge.net/ckan/ckanclient Tests ===== The ckanclient tests require the ckan and nose modules installed. Optionally ckanext-dgu can be installed too and the form api will be tested. To run the tests:: nosetests --ckan ckanclient/tests ckanclient_0.9/ckanclient/000077500000000000000000000000001162027050600156715ustar00rootroot00000000000000ckanclient_0.9/ckanclient/__init__.py000066400000000000000000000512621162027050600200100ustar00rootroot00000000000000__version__ = '0.9' __description__ = 'The CKAN client Python package.' __long_description__ = \ '''The CKAN client software may be used to make requests on the Comprehensive Knowledge Archive Network (CKAN) API including its REST interface to all primary objects (packages, groups, tags) and its search interface. Synopsis ======== The simplest way to make CKAN requests is: import ckanclient # Instantiate the CKAN client. ckan = ckanclient.CkanClient(api_key=my_key) # Get the package list. package_list = ckan.package_register_get() print package_list # Get the tag list. tag_list = ckan.tag_register_get() print tag_list # Collect the package metadata. package_entity = { 'name': my_package_name, 'url': my_package_url, 'download_url': my_package_download_url, 'tags': my_package_keywords, 'notes': my_package_long_description, } # Register the package. ckan.package_register_post(package_entity) # Get the details of a package. ckan.package_entity_get(package_name) package_entity = ckan.last_message print package_entity # Update the details of a package. ckan.package_entity_get(package_name) package_entity = ckan.last_message package_entity['url'] = new_package_url package_entity['notes'] = new_package_notes ckan.package_entity_put(package_entity) # List groups group_list = ckan.group_register_get() print group_list # Create a new group group_entity = { 'name': my_group_name, 'title': my_group_title, 'description': my_group_description, 'packages': group_package_names, } ckan.group_register_post(group_entity) # Get the details of a group. print ckan.group_entity_get(group_name) # Update the group details group_entity = ckan.last_message group_entity['title'] = new_group_title group_entity['packages'] = new_group_packages ckan.group_entity_put(group_entity) Changelog ========= v0.9 2011-08-09 --------------- * Default URL changed to thedatahub.org * Guard against 301 redirection, which loses POST contents v0.8 2011-07-20 --------------- * More detailed exceptions added * Some Python 3 compatibility v0.7 2011-01-27 --------------- * Package search returns results as a generator (rather than a list that needs to be paged) v0.5 2010-12-15 --------------- * Exception raised on error (more Pythonic) v0.4 2010-10-07 --------------- * Form API added * Package name editing * Groups added * Output can be verbose and use logger * Query API version * Sends API key via additional header v0.3 2010-04-28 --------------- * General usability improvements especially around error messages. * Package Relationships added * Package deletion fixed * Changeset entities added * Improved httpauth (thanks to will waites) v0.2 2009-11-05 --------------- * Search API support added * Improved package support to include additional fields such as 'extras' * Support tag and group entities in addition to package * Compatibility changes: CkanClient base_location (now should point to base api e.g. http://ckan.net/api rather than http://ckan.net/api/rest) v0.1 2008-04 ------------ * Fully functional implementation for REST interface to packages ''' __license__ = 'MIT' import os import re try: str = unicode from urllib2 import (urlopen, build_opener, install_opener, HTTPBasicAuthHandler, HTTPPasswordMgrWithDefaultRealm, Request, HTTPError, URLError) from urllib import urlencode except NameError: # Forward compatibility with Py3k from urllib.error import HTTPError, URLError from urllib.parse import urlencode from urllib.request import (build_opener, install_opener, urlopen, HTTPPasswordMgrWithDefaultRealm, HTTPBasicAuthHandler, Request) try: # since python 2.6 import json except ImportError: import simplejson as json import logging logger = logging.getLogger('ckanclient') PAGE_SIZE = 10 class CkanApiError(Exception): pass class CkanApiNotFoundError(CkanApiError): pass class CkanApiNotAuthorizedError(CkanApiError): pass class CkanApiConflictError(CkanApiError): pass class ApiRequest(Request): def __init__(self, url, data=None, headers={}, method=None): Request.__init__(self, url, data, headers) self._method = method def get_method(self): if self.has_data(): if not self._method: return 'POST' assert self._method in ('POST', 'PUT'), 'Invalid method "%s" for request with data.' % self._method return self._method else: if not self._method: return 'GET' assert self._method in ('GET', 'DELETE'), 'Invalid method "%s" for request without data.' % self._method return self._method class ApiClient(object): def reset(self): self.last_location = None self.last_status = None self.last_body = None self.last_headers = None self.last_message = None self.last_http_error = None self.last_url_error = None def open_url(self, location, data=None, headers={}, method=None): if self.is_verbose: self._print("ckanclient: Opening %s" % location) self.last_location = location try: if data != None: data = urlencode({data: 1}) req = ApiRequest(location, data, headers, method=method) self.url_response = urlopen(req) if data and self.url_response.geturl() != location: redirection = '%s -> %s' % (location, self.url_response.geturl()) raise URLError("Got redirected to another URL, which does not work with POSTS. Redirection: %s" % redirection) except HTTPError, inst: self._print("ckanclient: Received HTTP error code from CKAN resource.") self._print("ckanclient: location: %s" % location) self._print("ckanclient: response code: %s" % inst.fp.code) self._print("ckanclient: request headers: %s" % headers) self._print("ckanclient: request data: %s" % data) self._print("ckanclient: error: %s" % inst) self.last_http_error = inst self.last_status = inst.code self.last_message = inst.read() except URLError, inst: self._print("ckanclient: Unable to progress with URL.") self._print("ckanclient: location: %s" % location) self._print("ckanclient: request headers: %s" % headers) self._print("ckanclient: request data: %s" % data) self._print("ckanclient: error: %s" % inst) self.last_url_error = inst if isinstance(inst.reason, tuple): self.last_status,self.last_message = inst.reason else: self.last_message = inst.reason self.last_status = inst.errno else: self._print("ckanclient: OK opening CKAN resource: %s" % location) self.last_status = self.url_response.code self._print('ckanclient: last status %s' % self.last_status) self.last_body = self.url_response.read() self._print('ckanclient: last body %s' % self.last_body) self.last_headers = self.url_response.headers self._print('ckanclient: last headers %s' % self.last_headers) content_type = self.last_headers['Content-Type'] self._print('ckanclient: content type: %s' % content_type) is_json_response = False if 'json' in content_type: is_json_response = True if is_json_response: self.last_message = self._loadstr(self.last_body) else: self.last_message = self.last_body self._print('ckanclient: last message %s' % self.last_message) def get_location(self, resource_name, entity_id=None, subregister=None, entity2_id=None): base = self.base_location path = self.resource_paths[resource_name] if entity_id != None: path += '/' + entity_id if subregister != None: path += '/' + subregister if entity2_id != None: path += '/' + entity2_id return base + path def _dumpstr(self, data): return json.dumps(data) def _loadstr(self, string): try: if string == '': data = None else: data = json.loads(string) except ValueError, exception: msg = "Couldn't decode data from JSON string: '%s': %s" % (string, exception) raise ValueError, msg return data def _print(self, msg): '''Print depending on self.is_verbose and log at the same time.''' return logger.debug(msg) if self.is_verbose: print(msg) class CkanClient(ApiClient): """ Client API implementation for CKAN. :param base_location: default *http://thedatahub.org/api* :param api_key: default *None* :param is_verbose: default *False* :param http_user: default *None* :param http_pass: default *None* """ base_location = 'http://thedatahub.org/api' resource_paths = { 'Base': '', 'Changeset Register': '/rest/changeset', 'Changeset Entity': '/rest/changeset', 'Package Register': '/rest/package', 'Package Entity': '/rest/package', 'Tag Register': '/rest/tag', 'Tag Entity': '/rest/tag', 'Group Register': '/rest/group', 'Group Entity': '/rest/group', 'Package Search': '/search/package', 'Package Create Form': '/form/package/create', 'Package Edit Form': '/form/package/edit', } def __init__(self, base_location=None, api_key=None, is_verbose=False, http_user=None, http_pass=None): if base_location is not None: self.base_location = base_location self.api_key = api_key self.is_verbose = is_verbose if http_user and http_pass: password_mgr = HTTPPasswordMgrWithDefaultRealm() password_mgr.add_password(None, base_location, http_user, http_pass) handler = HTTPBasicAuthHandler(password_mgr) opener = build_opener(handler) install_opener(opener) def _auth_headers(self): return { 'Authorization': self.api_key, 'X-CKAN-API-Key': self.api_key } def open_url(self, url, *args, **kwargs): result = super(CkanClient, self).open_url(url, *args, **kwargs) if self.last_status not in (200, 201): if self.last_status == 404: raise CkanApiNotFoundError(self.last_status) elif self.last_status == 403: raise CkanApiNotAuthorizedError(self.last_status) elif self.last_status == 409: raise CkanApiConflictError(self.last_status) else: raise CkanApiError(self.last_message) return result def api_version_get(self): self.reset() url = self.get_location('Base') self.open_url(url) version = self.last_message['version'] return version # # Model API # def package_register_get(self): self.reset() url = self.get_location('Package Register') self.open_url(url) return self.last_message def package_register_post(self, package_dict): self.reset() url = self.get_location('Package Register') data = self._dumpstr(package_dict) headers = self._auth_headers() self.open_url(url, data, headers) return self.last_message def package_entity_get(self, package_name): self.reset() url = self.get_location('Package Entity', package_name) headers = self._auth_headers() self.open_url(url, headers=headers) return self.last_message def package_entity_put(self, package_dict, package_name=None): # You only need to specify the current package_name if you # are giving it a new package_name in the package_dict. self.reset() if not package_name: package_name = package_dict['name'] url = self.get_location('Package Entity', package_name) data = self._dumpstr(package_dict) headers = self._auth_headers() self.open_url(url, data, headers, method='PUT') return self.last_message def package_entity_delete(self, package_name): self.reset() url = self.get_location('Package Register', package_name) headers = self._auth_headers() self.open_url(url, headers=headers, method='DELETE') return self.last_message def package_relationship_register_get(self, package_name, relationship_type='relationships', relationship_with_package_name=None): self.reset() url = self.get_location('Package Entity', entity_id=package_name, subregister=relationship_type, entity2_id=relationship_with_package_name) headers = self._auth_headers() self.open_url(url, headers=headers) return self.last_message def package_relationship_entity_post(self, subject_package_name, relationship_type, object_package_name, comment=u''): self.reset() url = self.get_location('Package Entity', entity_id=subject_package_name, subregister=relationship_type, entity2_id=object_package_name) data = self._dumpstr({'comment':comment}) headers = self._auth_headers() self.open_url(url, data, headers, method='POST') return self.last_message def package_relationship_entity_put(self, subject_package_name, relationship_type, object_package_name, comment=u''): self.reset() url = self.get_location('Package Entity', entity_id=subject_package_name, subregister=relationship_type, entity2_id=object_package_name) data = self._dumpstr({'comment':comment}) headers = self._auth_headers() self.open_url(url, data, headers, method='PUT') return self.last_message def package_relationship_entity_delete(self, subject_package_name, relationship_type, object_package_name): self.reset() url = self.get_location('Package Entity', entity_id=subject_package_name, subregister=relationship_type, entity2_id=object_package_name) headers = self._auth_headers() self.open_url(url, headers=headers, method='DELETE') return self.last_message def tag_register_get(self): self.reset() url = self.get_location('Tag Register') self.open_url(url) return self.last_message def tag_entity_get(self, tag_name): self.reset() url = self.get_location('Tag Entity', tag_name) self.open_url(url) return self.last_message def group_register_post(self, group_dict): self.reset() url = self.get_location('Group Register') data = self._dumpstr(group_dict) headers = self._auth_headers() self.open_url(url, data, headers) return self.last_message def group_register_get(self): self.reset() url = self.get_location('Group Register') self.open_url(url) return self.last_message def group_entity_get(self, group_name): self.reset() url = self.get_location('Group Entity', group_name) self.open_url(url) return self.last_message def group_entity_put(self, group_dict, group_name=None): # You only need to specify the current group_name if you # are giving it a new group_name in the group_dict. self.reset() if not group_name: group_name = group_dict['name'] url = self.get_location('Group Entity', group_name) data = self._dumpstr(group_dict) headers = self._auth_headers() self.open_url(url, data, headers, method='PUT') return self.last_message # # Search API # def package_search(self, q, search_options=None): self.reset() search_options = search_options.copy() if search_options else {} url = self.get_location('Package Search') search_options['q'] = q if not search_options.get('limit'): search_options['limit'] = PAGE_SIZE data = self._dumpstr(search_options) headers = self._auth_headers() self.open_url(url, data, headers) result_dict = self.last_message if not search_options.get('offset'): result_dict['results'] = self._result_generator(result_dict['count'], result_dict['results'], self.package_search, q, search_options) return result_dict def _result_generator(self, count, results, func, q, search_options): '''Returns a generator that will make the necessary calls to page through results.''' page = 0 num_pages = int(count / search_options['limit'] + 0.9999) while True: for res in results: yield res # go to next page? page += 1 if page >= num_pages: break # retrieve next page search_options['offset'] = page * search_options['limit'] result_dict = func(q, search_options) results = result_dict['results'] # # Form API # def package_create_form_get(self): self.reset() url = self.get_location('Package Create Form') self.open_url(url) return self.last_message def package_create_form_post(self, form_submission): self.reset() url = self.get_location('Package Create Form') data = self._dumpstr(form_submission) headers = self._auth_headers() self.open_url(url, data, headers) return self.last_message def package_edit_form_get(self, package_ref): self.reset() url = self.get_location('Package Edit Form', package_ref) self.open_url(url) return self.last_message def package_edit_form_post(self, package_ref, form_submission): self.reset() url = self.get_location('Package Edit Form', package_ref) data = self._dumpstr(form_submission) headers = self._auth_headers() self.open_url(url, data, headers) return self.last_message # # Changeset API # def changeset_register_get(self): self.reset() url = self.get_location('Changeset Register') self.open_url(url) return self.last_message def changeset_entity_get(self, changeset_name): self.reset() url = self.get_location('Changeset Entity', changeset_name) self.open_url(url) return self.last_message # # data API # def _storage_metadata_url(self, path): url = self.base_location if not url.endswith("/"): url += "/" url += "storage/metadata" if not path.startswith("/"): url += "/" url += path return url def storage_metadata_get(self, path): url = self._storage_metadata_url(path) self.open_url(url) return self._loadstr(self.last_message) def storage_metadata_set(self, path, metadata): url = self._storage_metadata_url(path) payload = self._dumpstr(metadata) self.open_url(url, payload, method="PUT") return self._loadstr(self.last_message) def storage_metadata_update(self, path, metadata): url = self._storage_metadata_url(path) payload = self._dumpstr(metadata) self.open_url(url, payload, method="POST") return self._loadstr(self.last_message) def _storage_auth_url(self, path): url = self.base_location if not url.endswith("/"): url += "/" url += "storage/auth" if not path.startswith("/"): url += "/" url += path return url def storage_auth_get(self, path, headers): url = self._storage_auth_url(path) payload = self._dumpstr(headers) self.open_url(url, payload, method="POST") return self._loadstr(self.last_message) # # Utils # def is_id(self, id_string): '''Tells the client if the string looks like an id or not''' return bool(re.match('^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', id_string)) ckanclient_0.9/ckanclient/loaders/000077500000000000000000000000001162027050600173225ustar00rootroot00000000000000ckanclient_0.9/ckanclient/loaders/__init__.py000066400000000000000000000000001162027050600214210ustar00rootroot00000000000000ckanclient_0.9/ckanclient/loaders/base.py000077500000000000000000000434541162027050600206230ustar00rootroot00000000000000from optparse import OptionParser from gdata.spreadsheet.service import SpreadsheetsService as GoogleSpreadsheetsService from ckanclient import CkanClient, CkanApiError from time import sleep import string import pprint class GoogleSpreadsheetReader(object): """ Directs Google Spreadsheets service client to obtain spreadsheet cells. """ def __init__(self, options): """Init the Google Spreadsheets service client.""" self.options = options self.service = GoogleSpreadsheetsService() if not self.options.google_email: print "Warning: Google account email not provided." if not self.options.google_password: print "Warning: Google account password not provided." self.service.email = self.options.google_email self.service.password = self.options.google_password self.service.ProgrammaticLogin() if not self.options.google_spreadsheet_key: print "Warning: Google spreadsheet key not provided." def get_cells(self, sheet_index=0): """Returns a dict of cell data keyed by cell coordinate (row, col).""" cells = {} spreadsheet_key = self.options.google_spreadsheet_key sheets_feed = self.service.GetWorksheetsFeed(spreadsheet_key) sheet_id = sheets_feed.entry[sheet_index].id.text.split('/')[-1] cells_feed = self.service.GetCellsFeed(spreadsheet_key, sheet_id) for entry in cells_feed.entry: try: row_id = entry.cell.row col_id = entry.cell.col data = entry.content.text except Exception, inst: msg = "Couldn't read cell feed entry: %s" % inst msg += "\n%s" % entry raise Exception, msg try: row_id = int(row_id) col_id = int(col_id) except: continue cells[(row_id, col_id)] = data return cells class CkanLoader(object): """ Directs a CKAN service client to put obtained packages on CKAN. """ usage = '''usage: %prog OPTIONS''' def __init__(self): """Sets up options and init the CKAN service client.""" parser = OptionParser(self.usage) self.add_options(parser) (self.options, self.args) = parser.parse_args() self.init_ckanclient() def add_options(self, parser): """Adds options for CKAN serice location and REST API key.""" parser.add_option( '--ckan-api-location', dest='ckan_api_location', default='http://127.0.0.1:5000/api', help="""The location of working CKAN REST API.""") parser.add_option( '--ckan-api-key', dest='ckan_api_key', help="""A valid CKAN REST API key.""") parser.add_option( '--no-create-confirmation', dest='no_create_confimation', action='store_true', help="""Don't prompt for confirmation when registering a new package.""") parser.add_option( '--no-update-confirmation', dest='no_update_confimation', action='store_true', help="""Don't prompt for confirmation when updating a registered package.""") def init_ckanclient(self): """Init the CKAN client from options.""" if not self.options.ckan_api_location: print "Warning: CKAN API location not provided." if not self.options.ckan_api_key: print "Warning: CKAN API key not provided." self.ckanclient = CkanClient( base_location=self.options.ckan_api_location, api_key=self.options.ckan_api_key, ) def run(self): """Obtain packages and put them on CKAN.""" try: self.packages = [] self.obtain_packages() print "Putting %s packages on CKAN running at %s" % (len(self.packages), self.options.ckan_api_location) self.put_packages_on_ckan() except KeyboardInterrupt: print "" print "exiting..." print "" def obtain_packages(self): """Abstract method for obtaining packages.""" raise Exception, "Abstract method not implemented." def put_packages_on_ckan(self): """Uses CKAN client to register (or update) obtained packages.""" # Todo: Fix ckan or ckanclient, so this method isn't so long-winded. print "" sleep(1) for package in self.packages: try: registered_package = self.ckanclient.package_entity_get(package['name']) except CkanApiError: pass if self.ckanclient.last_status == 200: print "Package '%s' is already registered" % package['name'] print "" pprint.pprint(package) print "" if not self.options.no_update_confimation: answer = raw_input("Do you want to update this package with CKAN now? [y/N] ") if not answer or answer.lower()[0] != 'y': print "Skipping '%s' package..." % package['name'] print "" sleep(1) continue print "Updating package..." self.ckanclient.package_entity_put(package) if self.ckanclient.last_status == 200: print "Updated package '%s' OK." % package['name'] sleep(1) elif self.ckanclient.last_status == 403 or '403' in str(self.ckanclient.last_url_error): print "Error: Not authorised. Check your API key." sleep(1) sleep(1) sleep(1) sleep(1) elif self.ckanclient.last_http_error: print "Error: CKAN returned status code %s: %s" % ( self.ckanclient.last_status, self.ckanclient.last_http_error) sleep(1) sleep(1) sleep(1) elif self.ckanclient.last_url_error: print "Error: URL problems: %s" % self.ckanclient.last_url_error sleep(1) sleep(1) sleep(1) else: raise Exception, "Error: CKAN request didn't work at all." elif self.ckanclient.last_status == 404 or '404' in str(self.ckanclient.last_url_error): print "Package '%s' not currently registered" % package['name'] print "" pprint.pprint(package) print "" if not self.options.no_create_confimation: answer = raw_input("Do you want to register this package with CKAN now? [y/N] ") if not answer or answer.lower()[0] != 'y': print "Skipping '%s' package..." % package['name'] print "" sleep(1) continue print "Registering package..." self.ckanclient.package_register_post(package) if self.ckanclient.last_status in [200, 201]: print "Registered package '%s' OK." % package['name'] sleep(1) elif self.ckanclient.last_status == 403 or '403' in str(self.ckanclient.last_url_error): print "Error: Not authorised. Check your API key." sleep(1) sleep(1) sleep(1) sleep(1) elif self.ckanclient.last_http_error: print "Error: CKAN returned status code %s: %s" % ( self.ckanclient.last_status, self.ckanclient.last_http_error) sleep(1) sleep(1) sleep(1) elif self.ckanclient.last_url_error: print "Error: URL problems: %s" % self.ckanclient.last_url_error sleep(1) sleep(1) sleep(1) else: raise Exception, "Error: CKAN request didn't work at all." elif self.ckanclient.last_http_error: print "Error: CKAN returned status code %s: %s" % ( self.ckanclient.last_status, self.ckanclient.last_http_error) sleep(1) sleep(1) sleep(1) elif self.ckanclient.last_url_error: print "Error: URL problems: %s" % self.ckanclient.last_url_error sleep(1) sleep(1) sleep(1) else: raise Exception, "Error: CKAN request didn't work at all." def create_package(self, name, title='', url='', maintainer='', maintainer_email='', author='', author_email='', notes='', tags=[], extras={}, license_id=None, license=None, resources=[]): """Returns a CKAN REST API package from method arguments.""" if not isinstance(tags, list): raise Exception, "Package tags must be a list: %s" % tags if not isinstance(extras, dict): raise Exception, "Package extras must be a dict: %s" % tags package = {} package['name'] = self.coerce_package_name(name) package['title'] = title package['url'] = url package['notes'] = notes package['maintainer'] = maintainer package['maintainer_email'] = maintainer_email package['author'] = author package['author_email'] = author_email package['tags'] = tags package['extras'] = extras # Pre and post licenses servicization. if license_id != None: package['license_id'] = license_id elif license != None: package['license'] = license package['resources'] = resources return package def coerce_package_name(self, name): """Converts unicode string to valid CKAN package name.""" # Todo: Probably needs to be finished off. name = self.substitute_ascii_equivalents(name) name = name.lower() return name def substitute_ascii_equivalents(self, unicrap): # Method taken from: http://code.activestate.com/recipes/251871/ """This takes a UNICODE string and replaces Latin-1 characters with something equivalent in 7-bit ASCII. It returns a plain ASCII string. This function makes a best effort to convert Latin-1 characters into ASCII equivalents. It does not just strip out the Latin-1 characters. All characters in the standard 7-bit ASCII range are preserved. In the 8th bit range all the Latin-1 accented letters are converted to unaccented equivalents. Most symbol characters are converted to something meaningful. Anything not converted is deleted. """ xlate={0xc0:'A', 0xc1:'A', 0xc2:'A', 0xc3:'A', 0xc4:'A', 0xc5:'A', 0xc6:'Ae', 0xc7:'C', 0xc8:'E', 0xc9:'E', 0xca:'E', 0xcb:'E', 0xcc:'I', 0xcd:'I', 0xce:'I', 0xcf:'I', 0xd0:'Th', 0xd1:'N', 0xd2:'O', 0xd3:'O', 0xd4:'O', 0xd5:'O', 0xd6:'O', 0xd8:'O', 0xd9:'U', 0xda:'U', 0xdb:'U', 0xdc:'U', 0xdd:'Y', 0xde:'th', 0xdf:'ss', 0xe0:'a', 0xe1:'a', 0xe2:'a', 0xe3:'a', 0xe4:'a', 0xe5:'a', 0xe6:'ae', 0xe7:'c', 0xe8:'e', 0xe9:'e', 0xea:'e', 0xeb:'e', 0xec:'i', 0xed:'i', 0xee:'i', 0xef:'i', 0xf0:'th', 0xf1:'n', 0xf2:'o', 0xf3:'o', 0xf4:'o', 0xf5:'o', 0xf6:'o', 0xf8:'o', 0xf9:'u', 0xfa:'u', 0xfb:'u', 0xfc:'u', 0xfd:'y', 0xfe:'th', 0xff:'y', #0xa1:'!', 0xa2:'{cent}', 0xa3:'{pound}', 0xa4:'{currency}', #0xa5:'{yen}', 0xa6:'|', 0xa7:'{section}', 0xa8:'{umlaut}', #0xa9:'{C}', 0xaa:'{^a}', 0xab:'<<', 0xac:'{not}', #0xad:'-', 0xae:'{R}', 0xaf:'_', 0xb0:'{degrees}', #0xb1:'{+/-}', 0xb2:'{^2}', 0xb3:'{^3}', 0xb4:"'", #0xb5:'{micro}', 0xb6:'{paragraph}', 0xb7:'*', 0xb8:'{cedilla}', #0xb9:'{^1}', 0xba:'{^o}', 0xbb:'>>', #0xbc:'{1/4}', 0xbd:'{1/2}', 0xbe:'{3/4}', 0xbf:'?', #0xd7:'*', 0xf7:'/' } r = '' for i in unicrap: if xlate.has_key(ord(i)): r += xlate[ord(i)] elif ord(i) >= 0x80: pass else: r += str(i) return r def create_package_resource(self, url='', format='', hash='', description=''): return { 'url': url, 'format': format, 'hash': hash, 'description': description, } class AbstractGoogleSpreadsheetLoader(CkanLoader): """ Obtains packages from a Google spreadsheet and puts them on CKAN. """ def __init__(self): """Sets up a Google spreadsheet reader.""" super(AbstractGoogleSpreadsheetLoader, self).__init__() self.spreadsheet = GoogleSpreadsheetReader(self.options) def add_options(self, parser): """Adds options for accessing Google spreadsheet.""" super(AbstractGoogleSpreadsheetLoader, self).add_options(parser) parser.add_option( '--google-spreadsheet-key', dest='google_spreadsheet_key', help="""The projects databases metadata (a Google docs Spreadsheet key).""") parser.add_option( '--google-email', dest='google_email', help="""A Google account email address.""") parser.add_option( '--google-password', dest='google_password', help="""A Google account password for the email address.""") def obtain_packages(self): """Obtains packages from a Google spreadsheet.""" self.read_spreadsheet() self.convert_cells_to_packages() def read_spreadsheet(self): """Obtains cells from a Google spreadsheet.""" print "Reading Google spreadsheet. Please wait..." self.cells = self.spreadsheet.get_cells() def convert_cells_to_packages(self): """Abstract method for inferring CKAN packages from dict of cells.""" raise Exception, "Abstract method not implemented." class SimpleGoogleSpreadsheetLoader(AbstractGoogleSpreadsheetLoader): """ Obtains packages from a "simple" Google spreadsheet and puts them on CKAN. """ #Todo: More about what a "simple" spreadsheet consists of. HEADING_ROW_POSN = 0 FIRST_ENTITY_ROW_POSN = 1 def convert_cells_to_packages(self): """Infers CKAN packages from "simple" spreadsheet structure.""" # Discover working area. coords = self.cells.keys() coords.sort() row_ids = [i[0] for i in coords] col_ids = [i[1] for i in coords] top_left_coord = (min(row_ids), min(col_ids)) bottom_right_coord = (max(row_ids), max(col_ids)) print "Working area of spreadsheet: top-left %s; bottom-right %s." % (top_left_coord, bottom_right_coord) row_range = range(top_left_coord[0], bottom_right_coord[0]+1) col_range = range(top_left_coord[1], bottom_right_coord[1]+1) self.raw_entities = [] self.headings = [] # Gather headings. for col_id in col_range: row_id = row_range[self.HEADING_ROW_POSN] coord = (row_id, col_id) if coord in self.cells: heading = self.cells[coord] else: heading = "" self.headings.append(heading) print "There are %s headings: %s" % (len(self.headings), ", ".join(self.headings)) # Gather entity attributes. for row_id in row_range[self.FIRST_ENTITY_ROW_POSN:]: raw_entity = [] self.raw_entities.append(raw_entity) for col_id in col_range: coord = (row_id, col_id) if coord in self.cells: attribute = self.cells[coord] else: attribute = "" raw_entity.append(attribute) # Consolidate recorded entities. self.entities = [] for i, raw_entity in enumerate(self.raw_entities): entity = {} self.entities.append(entity) for j, value in enumerate(raw_entity): key = self.headings[j] entity[key] = value.strip() print "There are %s entities: %s" % (len(self.entities), ", ".join([self.coerce_package_name(e[self.headings[0]]) for e in self.entities])) # Construct packages. for entity in self.entities: # Why do we pop empty string? # Allow for case where '' not there if '' in entity: entity.pop('') package = self.entity_to_package(entity) if package: self.packages.append(package) print "There are %s metadata packages with titles extracted from the spreadsheet." % len(self.packages) def entity_to_package(self, entity): """Makes a CKAN package from "simple" spreadsheet entity.""" if 'name' in entity: package = self.create_package( name=entity.pop('name'), title=entity.pop('title', ''), url=entity.pop('url', ''), maintainer=entity.pop('maintainer', ''), maintainer_email=entity.pop('maintainer_email', ''), author=entity.pop('author', ''), author_email=entity.pop('author_email', ''), notes=entity.pop('notes', ''), tags=[tag for tag in entity.pop('tags', '').split(' ')], license_id=entity.pop('license', ''), extras=entity, ) else: package = None return package ckanclient_0.9/ckanclient/tests/000077500000000000000000000000001162027050600170335ustar00rootroot00000000000000ckanclient_0.9/ckanclient/tests/__init__.py000066400000000000000000000000001162027050600211320ustar00rootroot00000000000000ckanclient_0.9/ckanclient/tests/test_ckanclient.py000066400000000000000000000340201162027050600225560ustar00rootroot00000000000000import exceptions from nose.tools import assert_raises, assert_equal from nose.plugins.skip import SkipTest from pylons import config from ckan.tests import CkanServerCase from ckanclient import CkanClient, CkanApiError config_path = config['__file__'] class TestCkanClient(CkanServerCase): @classmethod def setup_class(self): self.pid = self._start_ckan_server() self.test_base_location = 'http://127.0.0.1:5000/api' self._wait_for_url(url=self.test_base_location) self._recreate_ckan_server_testdata(config_path) # this is api key created for tester user by create-test-data in ckan test_api_key = 'tester' test_api_key2 = 'tester2' self.c = CkanClient( base_location=self.test_base_location, api_key=test_api_key, is_verbose=True, ) self.c2 = CkanClient( base_location=self.test_base_location, api_key=test_api_key2, is_verbose=True, ) @classmethod def teardown_class(self): self._stop_ckan_server(self.pid) def delete_relationships(self): res = self.c.package_relationship_register_get('annakarenina') if self.c.last_status == 200: if self.c.last_message: for rel_dict in self.c.last_message: self.c.package_relationship_entity_delete( \ rel_dict['subject'], rel_dict['type'], rel_dict['object']) def test_01_get_locations(self): rest_base = self.test_base_location + '/rest' search_base = self.test_base_location + '/search' url = self.c.get_location('Base') assert url == self.test_base_location, url url = self.c.get_location('Package Register') assert url == rest_base + '/package' url = self.c.get_location('Package Entity', 'myname') assert url == rest_base + '/package/myname' url = self.c.get_location('Package Entity', 'myname', 'relationships') assert url == rest_base + '/package/myname/relationships' url = self.c.get_location('Package Entity', 'myname', 'relationships', 'name2') assert url == rest_base + '/package/myname/relationships/name2' url = self.c.get_location('Package Entity', 'myname', 'child_of', 'name2') assert url == rest_base + '/package/myname/child_of/name2' url = self.c.get_location('Group Register') assert url == rest_base + '/group' url = self.c.get_location('Group Entity', 'myname') assert url == rest_base + '/group/myname' url = self.c.get_location('Tag Register') assert url == rest_base + '/tag' url = self.c.get_location('Tag Entity', 'myname') assert url == rest_base + '/tag/myname' url = self.c.get_location('Tag Entity', 'myname') assert url == rest_base + '/tag/myname' url = self.c.get_location('Package Search') assert url == search_base + '/package' def test_02_get_api_version(self): version = self.c.api_version_get() status = self.c.last_status assert status == 200 body = self.c.last_body assert 'version' in body, body assert int(version) > 0, version def test_03_package_register_get(self): self.c.package_register_get() status = self.c.last_status assert status == 200 body = self.c.last_body assert 'annakarenina' in body, body assert type(self.c.last_message) == list assert 'annakarenina' in self.c.last_message def test_04_package_entity_get(self): # Check registered entity is found. self.c.package_entity_get('annakarenina') status = self.c.last_status assert status == 200, status body = self.c.last_body assert 'annakarenina' in body assert self.c.last_message message = self.c.last_message assert type(message) == dict assert message['name'] == u'annakarenina' assert message['title'] == u'A Novel By Tolstoy' def test_05_package_entity_get_404(self): # Check unregistered entity is not found. assert_raises(CkanApiError, self.c.package_entity_get, 'mycoffeecup') status = self.c.last_status assert status == 404, status @classmethod def _generate_pkg_name(self): pkg_name = 'ckanclienttest' import time timestr = str(time.time()).replace('.', '') pkg_name += timestr return pkg_name def test_06_package_register_post(self): pkg_name = self._generate_pkg_name() # Check package isn't registered. assert_raises(CkanApiError, self.c.package_entity_get, pkg_name) status = self.c.last_status assert status == 404, status # Check registration of new package. package = { 'name': pkg_name, 'url': 'orig_url', 'download_url': 'orig_download_url', 'tags': ['russian', 'newtag'], 'extras': {'genre':'thriller', 'format':'ebook'}, } self.c.package_register_post(package) status = self.c.last_status assert status == 201, status # Check package is registered. self.c.package_entity_get(pkg_name) status = self.c.last_status assert status == 200, status message = self.c.last_message assert message assert 'name' in message, repr(message) name = message['name'] assert name == pkg_name url = message['url'] assert url == 'orig_url' download_url = message['download_url'] assert download_url == 'orig_download_url' tags = message['tags'] # order out is not guaranteed assert set(tags) == set(['newtag', 'russian']), tags extras = message['extras'] assert extras == package['extras'] def test_07_package_entity_put(self): # Register new package. pkg_name_test_07 = self._generate_pkg_name() package = { 'name': pkg_name_test_07, 'url': 'orig_url', 'download_url': 'orig_download_url', 'tags': ['russian'], } self.c.package_register_post(package) status = self.c.last_status assert status == 201, status # Check update of existing package. mytag = 'mytag' + pkg_name_test_07 package = { 'name': pkg_name_test_07, 'url': 'new_url', 'download_url': 'new_download_url', 'tags': ['russian', 'tolstoy', mytag], 'extras': {'genre':'thriller', 'format':'ebook'}, } self.c.package_entity_put(package) status = self.c.last_status assert status == 200 # Check package is updated. self.c.package_entity_get(pkg_name_test_07) status = self.c.last_status assert status == 200, status message = self.c.last_message name = message['name'] assert name == pkg_name_test_07 url = message['url'] assert url == 'new_url' download_url = message['download_url'] assert download_url == 'new_download_url' tags = message['tags'] # order out is not guaranteed assert set(tags) == set(['russian', 'tolstoy', mytag]), tags extras = message['extras'] assert extras == package['extras'] def test_08_package_entity_delete(self): # create a package to be deleted pkg_name = self._generate_pkg_name() self.c.package_register_post({'name': pkg_name}) status = self.c.last_status assert status == 201, status # check it is readable self.c.package_entity_get(pkg_name) assert self.c.last_status == 200, self.c.last_status # delete it self.c.package_entity_delete(pkg_name) # see it is not readable by another user assert_raises(CkanApiError, self.c2.package_entity_get, pkg_name) assert self.c2.last_status == 403, self.c.last_status # see it is still readable by the author (therefore pkg admin) self.c.package_entity_get(pkg_name) assert self.c.last_status == 200, self.c.last_status def test_09_tag_register_get(self): self.c.tag_register_get() status = self.c.last_status assert status == 200 body = self.c.last_body assert 'russian' in body assert type(self.c.last_message) == list assert 'russian' in self.c.last_message def test_10_pkg_search_basic(self): res = self.c.package_search('Novel') status = self.c.last_status assert status == 200, status assert_equal(list(res['results']), [u'annakarenina']) assert_equal(res['count'], 1) def test_10_pkg_search_paged(self): res = self.c.package_search('russian', search_options={'limit': 1}) status = self.c.last_status assert status == 200, status all_results = list(res['results']) assert set(all_results) >= set([u'annakarenina', u'warandpeace']), all_results assert res['count'] >= 2, '%r %r' % (res, all_results) def test_10_pkg_search_options(self): res = self.c.package_search(None, search_options={'groups': 'roger'}) status = self.c.last_status assert status == 200, status assert_equal(list(res['results']), [u'annakarenina']) assert_equal(res['count'], 1) def test_10_pkg_search_options_all_fields(self): res = self.c.package_search(None, search_options={'groups': 'roger', 'all_fields': True}) status = self.c.last_status assert status == 200, status assert_equal(res['count'], 1) assert_equal(list(res['results'])[0]['name'], u'annakarenina') def test_11_package_relationship_post(self): res = self.c.package_relationship_register_get('annakarenina') assert self.c.last_status == 200, self.c.last_status assert not self.c.last_message, self.c.last_body # create relationship res = self.c.package_relationship_entity_post('annakarenina', 'child_of', 'warandpeace', 'some comment') try: assert self.c.last_status == 201, self.c.last_status finally: self.delete_relationships() def test_12_package_relationship_get(self): # create relationship res = self.c.package_relationship_entity_post('annakarenina', 'child_of', 'warandpeace', 'some comment') # read relationship try: res = self.c.package_relationship_register_get('annakarenina') assert self.c.last_status == 200, self.c.last_status rels = self.c.last_message assert len(rels) == 1, rels assert rels[0]['subject'] == 'annakarenina', rels[0] assert rels[0]['object'] == 'warandpeace', rels[0] assert rels[0]['type'] == 'child_of', rels[0] assert rels[0]['comment'] == 'some comment', rels[0] finally: self.delete_relationships() def test_13_package_relationship_put(self): # create relationship res = self.c.package_relationship_entity_post('annakarenina', 'child_of', 'warandpeace', 'some comment') # update relationship try: res = self.c.package_relationship_entity_put('annakarenina', 'child_of', 'warandpeace', 'new comment') assert self.c.last_status == 200, self.c.last_status # read relationship res = self.c.package_relationship_register_get('annakarenina') assert self.c.last_status == 200, self.c.last_status rels = self.c.last_message assert len(rels) == 1, rels assert rels[0]['comment'] == 'new comment', rels[0] finally: self.delete_relationships() def test_14_package_relationship_delete(self): # create relationship res = self.c.package_relationship_entity_post('annakarenina', 'child_of', 'warandpeace', 'some comment') try: self.c.package_relationship_entity_delete('annakarenina', 'child_of', 'warandpeace') # read relationship gives 404 assert_raises(CkanApiError, self.c.package_relationship_register_get, 'annakarenina', 'child_of', 'warandpeace') assert self.c.last_status == 404, self.c.last_status # and register of relationships is blank res = self.c.package_relationship_register_get('annakarenina', 'relationships', 'warandpeace') assert self.c.last_status == 200, self.c.last_status assert not res, res finally: self.delete_relationships() def test_15_package_edit_form_get(self): try: import ckanext.dgu except exceptions.ImportError, e: raise SkipTest('Need dgu_form_api plugin (from ckanext-dgu) installed to test form api client.') if 'dgu_form_api' not in config.get('ckan.plugins', ''): raise SkipTest('Need dgu_form_api plugin (from ckanext-dgu) enabled to test form api client.') res = self.c.package_edit_form_get('annakarenina') assert self.c.last_status == 200, self.c.last_status assert res, res def test_16_group_get(self): groups = self.c.group_register_get() assert 'david' in groups, groups assert 'roger' in groups david = self.c.group_entity_get('david') for expected_key in ('name', 'id', 'title', 'created', 'description'): assert expected_key in david, david assert set(david['packages']) == set((u'annakarenina', u'warandpeace')), david roger = self.c.group_entity_get('roger') assert roger['packages'] == [u'annakarenina'], roger ckanclient_0.9/setup.cfg000066400000000000000000000000431162027050600153740ustar00rootroot00000000000000[nosetests] with-pylons = test.ini ckanclient_0.9/setup.py000066400000000000000000000022251162027050600152710ustar00rootroot00000000000000try: from setuptools import setup, find_packages except ImportError: from ez_setup import use_setuptools use_setuptools() from setuptools import setup, find_packages from ckanclient import __version__, __description__, __long_description__, __license__ import os setup( name='ckanclient', version=__version__, author='Appropriate Software Foundation, Open Knowledge Foundation', author_email='info@okfn.org', license=__license__, url='http://www.okfn.org/ckan/', description=__description__, keywords='data packaging component tool client', long_description =__long_description__, install_requires=[ # only required if python <= 2.5 (as json library in python >= 2.6) # 'simplejson', ], packages=find_packages(exclude=['ez_setup']), include_package_data=True, always_unzip=True, classifiers = [ 'Development Status :: 4 - Beta', 'Intended Audience :: Developers', 'Operating System :: OS Independent', 'Programming Language :: Python', 'Topic :: Software Development :: Libraries :: Python Modules'], test_suite = 'nose.collector', ) ckanclient_0.9/test.ini000066400000000000000000000016311162027050600152370ustar00rootroot00000000000000# # ckan - Pylons testing environment configuration # # The %(here)s variable will be replaced with the parent directory of this file # [DEFAULT] debug = true # Uncomment and replace with the address which should receive any error reports #email_to = you@yourdomain.com smtp_server = localhost error_email_from = paste@localhost [server:main] use = egg:Paste#http host = 0.0.0.0 port = 5000 [app:main] use = config:../ckan/test-core.ini # Logging configuration [loggers] keys = root, ckan, sqlalchemy [handlers] keys = console [formatters] keys = generic [logger_root] level = WARN handlers = console [logger_ckan] qualname = ckan handlers = level = INFO [logger_sqlalchemy] handlers = qualname = sqlalchemy.engine level = WARN [handler_console] class = StreamHandler args = (sys.stdout,) level = NOTSET formatter = generic [formatter_generic] format = %(asctime)s %(levelname)-5.5s [%(name)s] %(message)s