w3lib-1.5/0000755000015400001640000000000012237365631014034 5ustar buildbotbuildbot00000000000000w3lib-1.5/w3lib/0000755000015400001640000000000012237365631015054 5ustar buildbotbuildbot00000000000000w3lib-1.5/w3lib/util.py0000644000015400001640000000065312237365612016406 0ustar buildbotbuildbot00000000000000import six def str_to_unicode(text, encoding=None, errors='strict'): if encoding is None: encoding = 'utf-8' if isinstance(text, bytes): return text.decode(encoding, errors) return text def unicode_to_str(text, encoding=None, errors='strict'): if encoding is None: encoding = 'utf-8' if isinstance(text, six.text_type): return text.encode(encoding, errors) return text w3lib-1.5/w3lib/http.py0000644000015400001640000000422512237365612016407 0ustar buildbotbuildbot00000000000000from base64 import urlsafe_b64encode def headers_raw_to_dict(headers_raw): """ Convert raw headers (single multi-line string) to the dictionary. For example: >>> headers_raw_to_dict("Content-type: text/html\\n\\rAccept: gzip\\n\\n") {'Content-type': ['text/html'], 'Accept': ['gzip']} Incorrect input: >>> headers_raw_to_dict("Content-typt gzip\\n\\n") {} Argument is None: >>> headers_raw_to_dict(None) """ if headers_raw is None: return None return dict([ (header_item[0].strip(), [header_item[1].strip()]) for header_item in [ header.split(':', 1) for header in headers_raw.splitlines()] if len(header_item) == 2]) def headers_dict_to_raw(headers_dict): """ Returns a raw HTTP headers representation of headers For example: >>> headers_dict_to_raw({'Content-type': 'text/html', 'Accept': 'gzip'}) 'Content-type: text/html\\r\\nAccept: gzip' >>> from twisted.python.util import InsensitiveDict >>> td = InsensitiveDict({'Content-type': ['text/html'], 'Accept': ['gzip']}) >>> headers_dict_to_raw(td) 'Content-type: text/html\\r\\nAccept: gzip' Argument is None: >>> headers_dict_to_raw(None) """ if headers_dict is None: return None raw_lines = [] for key, value in headers_dict.items(): if isinstance(value, (str, unicode)): raw_lines.append("%s: %s" % (key, value)) elif isinstance(value, (list, tuple)): for v in value: raw_lines.append("%s: %s" % (key, v)) return '\r\n'.join(raw_lines) def basic_auth_header(username, password): """Return `Authorization` header for HTTP Basic Access Authentication (RFC 2617)""" auth = "%s:%s" % (username, password) if not isinstance(auth, bytes): # XXX: RFC 2617 doesn't define encoding, but ISO-8859-1 # seems to be the most widely used encoding here. See also: # http://greenbytes.de/tech/webdav/draft-ietf-httpauth-basicauth-enc-latest.html auth = auth.encode('ISO-8859-1') return 'Basic ' + urlsafe_b64encode(auth).decode('ascii') w3lib-1.5/w3lib/encoding.py0000644000015400001640000001605012237365612017215 0ustar buildbotbuildbot00000000000000""" Functions for handling encoding of web pages """ import re, codecs, encodings _HEADER_ENCODING_RE = re.compile(r'charset=([\w-]+)', re.I) def http_content_type_encoding(content_type): """Extract the encoding in the content-type header""" if content_type: match = _HEADER_ENCODING_RE.search(content_type) if match: return resolve_encoding(match.group(1)) # regexp for parsing HTTP meta tags _TEMPLATE = r'''%s\s*=\s*["']?\s*%s\s*["']?''' _HTTPEQUIV_RE = _TEMPLATE % ('http-equiv', 'Content-Type') _CONTENT_RE = _TEMPLATE % ('content', r'(?P[^;]+);\s*charset=(?P[\w-]+)') _CONTENT2_RE = _TEMPLATE % ('charset', r'(?P[\w-]+)') _XML_ENCODING_RE = _TEMPLATE % ('encoding', r'(?P[\w-]+)') # check for meta tags, or xml decl. and stop search if a body tag is encountered _BODY_ENCODING_PATTERN = r'<\s*(?:meta(?:(?:\s+%s|\s+%s){2}|\s+%s)|\?xml\s[^>]+%s|body)' % \ (_HTTPEQUIV_RE, _CONTENT_RE, _CONTENT2_RE, _XML_ENCODING_RE) _BODY_ENCODING_STR_RE = re.compile(_BODY_ENCODING_PATTERN, re.I) _BODY_ENCODING_BYTES_RE = re.compile(_BODY_ENCODING_PATTERN.encode('ascii'), re.I) def html_body_declared_encoding(html_body_str): """encoding specified in meta tags in the html body, or None if no suitable encoding was found """ # html5 suggests the first 1024 bytes are sufficient, we allow for more chunk = html_body_str[:4096] if isinstance(chunk, bytes): match = _BODY_ENCODING_BYTES_RE.search(chunk) else: match = _BODY_ENCODING_STR_RE.search(chunk) if match: encoding = match.group('charset') or match.group('charset2') \ or match.group('xmlcharset') if encoding: return resolve_encoding(encoding) # Default encoding translation # this maps cannonicalized encodings to target encodings # see http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#character-encodings-0 # in addition, gb18030 supercedes gb2312 & gbk # the keys are converted using _c18n_encoding and in sorted order DEFAULT_ENCODING_TRANSLATION = { 'ascii': 'cp1252', 'euc_kr': 'cp949', 'gb2312': 'gb18030', 'gb_2312_80': 'gb18030', 'gbk': 'gb18030', 'iso8859_11': 'cp874', 'iso8859_9': 'cp1254', 'latin_1': 'cp1252', 'macintosh': 'mac_roman', 'shift_jis': 'cp932', 'tis_620': 'cp874', 'win_1251': 'cp1251', 'windows_31j': 'cp932', 'win_31j': 'cp932', 'windows_874': 'cp874', 'win_874': 'cp874', 'x_sjis': 'cp932', 'zh_cn': 'gb18030' } def _c18n_encoding(encoding): """Cannonicalize an encoding name This performs normalization and translates aliases using python's encoding aliases """ normed = encodings.normalize_encoding(encoding).lower() return encodings.aliases.aliases.get(normed, normed) def resolve_encoding(encoding_alias): """Return the encoding the given encoding alias maps to, or None if the encoding cannot be interpreted """ c18n_encoding = _c18n_encoding(encoding_alias) translated = DEFAULT_ENCODING_TRANSLATION.get(c18n_encoding, c18n_encoding) try: return codecs.lookup(translated).name except LookupError: return None _BOM_TABLE = [ (codecs.BOM_UTF32_BE, 'utf-32-be'), (codecs.BOM_UTF32_LE, 'utf-32-le'), (codecs.BOM_UTF16_BE, 'utf-16-be'), (codecs.BOM_UTF16_LE, 'utf-16-le'), (codecs.BOM_UTF8, 'utf-8') ] _FIRST_CHARS = set(c[0] for (c, _) in _BOM_TABLE) def read_bom(data): """Read the byte order mark in the text, if present, and return the encoding represented by the BOM and the BOM. If no BOM can be detected, (None, None) is returned. """ # common case is no BOM, so this is fast if data and data[0] in _FIRST_CHARS: for bom, encoding in _BOM_TABLE: if data.startswith(bom): return encoding, bom return None, None # Python decoder doesn't follow unicode standard when handling # bad utf-8 encoded strings. see http://bugs.python.org/issue8271 codecs.register_error('w3lib_replace', lambda exc: (u'\ufffd', exc.start+1)) def to_unicode(data_str, encoding): """Convert a str object to unicode using the encoding given Characters that cannot be converted will be converted to '\ufffd' (the unicode replacement character). """ return data_str.decode(encoding, 'w3lib_replace') def html_to_unicode(content_type_header, html_body_str, default_encoding='utf8', auto_detect_fun=None): """Convert raw html bytes to unicode This attempts to make a reasonable guess at the content encoding of the html body, following a similar process as a web browser. It will try in order: * http content type header * BOM (byte-order mark) * meta or xml tag declarations * auto-detection, if the `auto_detect_fun` keyword argument is not None * default encoding in keyword arg (which defaults to utf8) If an encoding other than the auto-detected or default encoding is used, overrides will be applied, converting some character encodings to more suitable alternatives. If a BOM is found matching the encoding, it will be stripped. The `auto_detect_fun` argument can be used to pass a function that will sniff the encoding of the text. This function must take the raw text as an argument and return the name of an encoding that python can process, or None. To use chardet, for example, you can define the function as: auto_detect_fun=lambda x: chardet.detect(x).get('encoding') or to use UnicodeDammit (shipped with the BeautifulSoup library): auto_detect_fun=lambda x: UnicodeDammit(x).originalEncoding If the locale of the website or user language preference is known, then a better default encoding can be supplied. If the content type header is not present, None can be passed signifying that the header was not present. This method will not fail, if characters cannot be converted to unicode, '\ufffd' (the unicode replacement character) will be inserted instead. returns a tuple of (encoding used, unicode) """ enc = http_content_type_encoding(content_type_header) bom_enc, bom = read_bom(html_body_str) if enc is not None: # remove BOM if it agrees with the encoding if enc == bom_enc: html_body_str = html_body_str[len(bom):] elif enc == 'utf-16' or enc == 'utf-32': # read endianness from BOM, or default to big endian # tools.ietf.org/html/rfc2781 section 4.3 if bom_enc is not None and bom_enc.startswith(enc): enc = bom_enc html_body_str = html_body_str[len(bom):] else: enc += '-be' return enc, to_unicode(html_body_str, enc) if bom_enc is not None: return bom_enc, to_unicode(html_body_str[len(bom):], bom_enc) enc = html_body_declared_encoding(html_body_str) if enc is None and (auto_detect_fun is not None): enc = auto_detect_fun(html_body_str) if enc is None: enc = default_encoding return enc, to_unicode(html_body_str, enc) w3lib-1.5/w3lib/form.py0000644000015400001640000000275512237365612016401 0ustar buildbotbuildbot00000000000000import six if six.PY2: from cStringIO import StringIO as BytesIO else: from io import BytesIO from w3lib.util import unicode_to_str def encode_multipart(data): """Encode the given data to be used in a multipart HTTP POST. Data is a where keys are the field name, and values are either strings or tuples (filename, content) for file uploads. This code is based on distutils.command.upload. Return (body, boundary) tuple where ``body`` is binary body value, and ``boundary`` is the boundary used (as native string). """ # Build up the MIME payload for the POST data boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254' sep_boundary = b'\r\n--' + boundary.encode('ascii') end_boundary = sep_boundary + b'--' body = BytesIO() for key, value in data.items(): title = u'\r\nContent-Disposition: form-data; name="%s"' % key # handle multiple entries for the same name if type(value) != type([]): value = [value] for value in value: if type(value) is tuple: title += u'; filename="%s"' % value[0] value = value[1] else: value = unicode_to_str(value) # in distutils: str(value).encode('utf-8') body.write(sep_boundary) body.write(title.encode('utf-8')) body.write(b"\r\n\r\n") body.write(value) body.write(end_boundary) body.write(b"\r\n") return body.getvalue(), boundary w3lib-1.5/w3lib/url.py0000644000015400001640000001243212237365612016231 0ustar buildbotbuildbot00000000000000""" This module contains general purpose URL functions not found in the standard library. """ import os import re import posixpath import warnings from six import moves from w3lib.util import unicode_to_str # Python 2.x urllib.always_safe become private in Python 3.x; # its content is copied here _ALWAYS_SAFE_BYTES = (b'ABCDEFGHIJKLMNOPQRSTUVWXYZ' b'abcdefghijklmnopqrstuvwxyz' b'0123456789' b'_.-') def urljoin_rfc(base, ref, encoding='utf-8'): """Same as urlparse.urljoin but supports unicode values in base and ref parameters (in which case they will be converted to str using the given encoding). Always returns a str. """ warnings.warn("w3lib.url.urljoin_rfc is deprecated, use urlparse.urljoin instead", DeprecationWarning) str_base = unicode_to_str(base, encoding) str_ref = unicode_to_str(ref, encoding) return moves.urllib.parse.urljoin(str_base, str_ref) _reserved = b';/?:@&=+$|,#' # RFC 3986 (Generic Syntax) _unreserved_marks = b"-_.!~*'()" # RFC 3986 sec 2.3 _safe_chars = _ALWAYS_SAFE_BYTES + b'%' + _reserved + _unreserved_marks def safe_url_string(url, encoding='utf8'): """Convert the given url into a legal URL by escaping unsafe characters according to RFC-3986. If a unicode url is given, it is first converted to str using the given encoding (which defaults to 'utf-8'). When passing a encoding, you should use the encoding of the original page (the page from which the url was extracted from). Calling this function on an already "safe" url will return the url unmodified. Always returns a str. """ s = unicode_to_str(url, encoding) return moves.urllib.parse.quote(s, _safe_chars) _parent_dirs = re.compile(r'/?(\.\./)+') def safe_download_url(url): """ Make a url for download. This will call safe_url_string and then strip the fragment, if one exists. The path will be normalised. If the path is outside the document root, it will be changed to be within the document root. """ safe_url = safe_url_string(url) scheme, netloc, path, query, _ = moves.urllib.parse.urlsplit(safe_url) if path: path = _parent_dirs.sub('', posixpath.normpath(path)) if url.endswith('/') and not path.endswith('/'): path += '/' else: path = '/' return moves.urllib.parse.urlunsplit((scheme, netloc, path, query, '')) def is_url(text): return text.partition("://")[0] in ('file', 'http', 'https') def url_query_parameter(url, parameter, default=None, keep_blank_values=0): """Return the value of a url parameter, given the url and parameter name""" queryparams = moves.urllib.parse.parse_qs( moves.urllib.parse.urlsplit(str(url))[3], keep_blank_values=keep_blank_values ) return queryparams.get(parameter, [default])[0] def url_query_cleaner(url, parameterlist=(), sep='&', kvsep='=', remove=False, unique=True): """Clean url arguments leaving only those passed in the parameterlist keeping order If remove is True, leave only those not in parameterlist. If unique is False, do not remove duplicated keys """ url = moves.urllib.parse.urldefrag(url)[0] base, _, query = url.partition('?') seen = set() querylist = [] for ksv in query.split(sep): k, _, _ = ksv.partition(kvsep) if unique and k in seen: continue elif remove and k in parameterlist: continue elif not remove and k not in parameterlist: continue else: querylist.append(ksv) seen.add(k) return '?'.join([base, sep.join(querylist)]) if querylist else base def add_or_replace_parameter(url, name, new_value, sep='&', url_is_quoted=False): """Add or remove a parameter to a given url""" def has_querystring(url): _, _, _, query, _ = moves.urllib.parse.urlsplit(url) return bool(query) parameter = url_query_parameter(url, name, keep_blank_values=1) if url_is_quoted: parameter = moves.urllib.parse.quote(parameter) if parameter is None: if has_querystring(url): next_url = url + sep + name + '=' + new_value else: next_url = url.rstrip('?') + '?' + name + '=' + new_value else: next_url = url.replace(name+'='+parameter, name+'='+new_value) return next_url def path_to_file_uri(path): """Convert local filesystem path to legal File URIs as described in: http://en.wikipedia.org/wiki/File_URI_scheme """ x = moves.urllib.request.pathname2url(os.path.abspath(path)) if os.name == 'nt': x = x.replace('|', ':') # http://bugs.python.org/issue5861 return 'file:///%s' % x.lstrip('/') def file_uri_to_path(uri): """Convert File URI to local filesystem path according to: http://en.wikipedia.org/wiki/File_URI_scheme """ uri_path = moves.urllib.parse.urlparse(uri).path return moves.urllib.request.url2pathname(uri_path) def any_to_uri(uri_or_path): """If given a path name, return its File URI, otherwise return it unmodified """ if os.path.splitdrive(uri_or_path)[0]: return path_to_file_uri(uri_or_path) u = moves.urllib.parse.urlparse(uri_or_path) return uri_or_path if u.scheme else path_to_file_uri(uri_or_path) w3lib-1.5/w3lib/__init__.py0000644000015400001640000000000012237365612017152 0ustar buildbotbuildbot00000000000000w3lib-1.5/w3lib/html.py0000644000015400001640000001723612237365612016402 0ustar buildbotbuildbot00000000000000""" Functions for dealing with markup text """ import re import six from six import moves from w3lib.util import str_to_unicode, unicode_to_str from w3lib.url import safe_url_string _ent_re = re.compile(r'&(#?(x?))([^&;\s]+);') _tag_re = re.compile(r'<[a-zA-Z\/!].*?>', re.DOTALL) _baseurl_re = re.compile(six.u(r']*http-equiv[^>]*refresh[^>]*content\s*=\s*(?P["\'])(?P(\d*\.)?\d+)\s*;\s*url=(?P.*?)(?P=quote)'), re.DOTALL | re.IGNORECASE) _cdata_re = re.compile(r'((?P.*?)(?P\]\]>))', re.DOTALL) def remove_entities(text, keep=(), remove_illegal=True, encoding='utf-8'): """Remove entities from the given text by converting them to corresponding unicode character. 'text' can be a unicode string or a byte string encoded in the given `encoding` (which defaults to 'utf-8'). If 'keep' is passed (with a list of entity names) those entities will be kept (they won't be removed). It supports both numeric (&#nnnn; and &#hhhh;) and named (  >) entities. If remove_illegal is True, entities that can't be converted are removed. If remove_illegal is False, entities that can't be converted are kept "as is". For more information see the tests. Always returns a unicode string (with the entities removed). """ def convert_entity(m): entity_body = m.group(3) if m.group(1): try: if m.group(2): number = int(entity_body, 16) else: number = int(entity_body, 10) # Numeric character references in the 80-9F range are typically # interpreted by browsers as representing the characters mapped # to bytes 80-9F in the Windows-1252 encoding. For more info # see: http://en.wikipedia.org/wiki/Character_encodings_in_HTML if 0x80 <= number <= 0x9f: return six.int2byte(number).decode('cp1252') except ValueError: number = None else: if entity_body in keep: return m.group(0) else: number = moves.html_entities.name2codepoint.get(entity_body) if number is not None: try: return six.unichr(number) except ValueError: pass return u'' if remove_illegal else m.group(0) return _ent_re.sub(convert_entity, str_to_unicode(text, encoding)) def has_entities(text, encoding=None): return bool(_ent_re.search(str_to_unicode(text, encoding))) def replace_tags(text, token='', encoding=None): """Replace all markup tags found in the given text by the given token. By default token is a null string so it just remove all tags. 'text' can be a unicode string or a regular string encoded as 'utf-8' Always returns a unicode string. """ return _tag_re.sub(token, str_to_unicode(text, encoding)) _REMOVECOMMENTS_RE = re.compile(u'', re.DOTALL) def remove_comments(text, encoding=None): """ Remove HTML Comments. """ text = str_to_unicode(text, encoding) return _REMOVECOMMENTS_RE.sub(u'', text) def remove_tags(text, which_ones=(), keep=(), encoding=None): """ Remove HTML Tags only. which_ones and keep are both tuples, there are four cases: which_ones, keep (1 - not empty, 0 - empty) 1, 0 - remove all tags in which_ones 0, 1 - remove all tags except the ones in keep 0, 0 - remove all tags 1, 1 - not allowd """ assert not (which_ones and keep), 'which_ones and keep can not be given at the same time' def will_remove(tag): if which_ones: return tag in which_ones else: return tag not in keep def remove_tag(m): tag = m.group(1) return u'' if will_remove(tag) else m.group(0) regex = '/]+).*?>' retags = re.compile(regex, re.DOTALL | re.IGNORECASE) return retags.sub(remove_tag, str_to_unicode(text, encoding)) def remove_tags_with_content(text, which_ones=(), encoding=None): """ Remove tags and its content. which_ones -- is a tuple of which tags with its content we want to remove. if is empty do nothing. """ text = str_to_unicode(text, encoding) if which_ones: tags = '|'.join([r'<%s.*?|<%s\s*/>' % (tag, tag, tag) for tag in which_ones]) retags = re.compile(tags, re.DOTALL | re.IGNORECASE) text = retags.sub(u'', text) return text def replace_escape_chars(text, which_ones=('\n', '\t', '\r'), replace_by=u'', \ encoding=None): """ Remove escape chars. Default : \\n, \\t, \\r which_ones -- is a tuple of which escape chars we want to remove. By default removes \n, \t, \r. replace_by -- text to replace the escape chars for. It defaults to '', so the escape chars are removed. """ text = str_to_unicode(text, encoding) for ec in which_ones: text = text.replace(ec, str_to_unicode(replace_by, encoding)) return text def unquote_markup(text, keep=(), remove_illegal=True, encoding=None): """ This function receives markup as a text (always a unicode string or a utf-8 encoded string) and does the following: - removes entities (except the ones in 'keep') from any part of it that it's not inside a CDATA - searches for CDATAs and extracts their text (if any) without modifying it. - removes the found CDATAs """ def _get_fragments(txt, pattern): offset = 0 for match in pattern.finditer(txt): match_s, match_e = match.span(1) yield txt[offset:match_s] yield match offset = match_e yield txt[offset:] text = str_to_unicode(text, encoding) ret_text = u'' for fragment in _get_fragments(text, _cdata_re): if isinstance(fragment, six.string_types): # it's not a CDATA (so we try to remove its entities) ret_text += remove_entities(fragment, keep=keep, remove_illegal=remove_illegal) else: # it's a CDATA (so we just extract its content) ret_text += fragment.group('cdata_d') return ret_text def get_base_url(text, baseurl='', encoding='utf-8'): """Return the base url if declared in the given html text, relative to the given base url. If no base url is found, the given base url is returned """ text = str_to_unicode(text, encoding) baseurl = unicode_to_str(baseurl, encoding) m = _baseurl_re.search(text) if m: baseurl = moves.urllib.parse.urljoin(baseurl, m.group(1).encode(encoding)) return safe_url_string(baseurl) def get_meta_refresh(text, baseurl='', encoding='utf-8'): """Return the http-equiv parameter of the HTML meta element from the given HTML text and return a tuple (interval, url) where interval is an integer containing the delay in seconds (or zero if not present) and url is a string with the absolute url to redirect. If no meta redirect is found, (None, None) is returned. """ if six.PY2: baseurl = unicode_to_str(baseurl, encoding) try: text = str_to_unicode(text, encoding) except UnicodeDecodeError: print(text) raise text = remove_comments(remove_entities(text)) m = _meta_refresh_re.search(text) if m: interval = float(m.group('int')) url = safe_url_string(m.group('url').strip(' "\''), encoding) url = moves.urllib.parse.urljoin(baseurl, url) return interval, url else: return None, None w3lib-1.5/w3lib.egg-info/0000755000015400001640000000000012237365631016546 5ustar buildbotbuildbot00000000000000w3lib-1.5/w3lib.egg-info/dependency_links.txt0000644000015400001640000000000112237365630022613 0ustar buildbotbuildbot00000000000000 w3lib-1.5/w3lib.egg-info/top_level.txt0000644000015400001640000000000612237365630021273 0ustar buildbotbuildbot00000000000000w3lib w3lib-1.5/w3lib.egg-info/SOURCES.txt0000644000015400001640000000040112237365630020424 0ustar buildbotbuildbot00000000000000setup.py w3lib/__init__.py w3lib/encoding.py w3lib/form.py w3lib/html.py w3lib/http.py w3lib/url.py w3lib/util.py w3lib.egg-info/PKG-INFO w3lib.egg-info/SOURCES.txt w3lib.egg-info/dependency_links.txt w3lib.egg-info/requires.txt w3lib.egg-info/top_level.txtw3lib-1.5/w3lib.egg-info/requires.txt0000644000015400001640000000001412237365630021140 0ustar buildbotbuildbot00000000000000six >= 1.4.1w3lib-1.5/w3lib.egg-info/PKG-INFO0000644000015400001640000000145112237365630017643 0ustar buildbotbuildbot00000000000000Metadata-Version: 1.1 Name: w3lib Version: 1.5 Summary: Library of web-related functions Home-page: https://github.com/scrapy/w3lib Author: Scrapy project Author-email: info@scrapy.org License: BSD Description: UNKNOWN Platform: Any Classifier: Development Status :: 5 - Production/Stable Classifier: License :: OSI Approved :: BSD License Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.3 Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Programming Language :: Python :: Implementation :: PyPy Classifier: Topic :: Internet :: WWW/HTTP w3lib-1.5/setup.py0000644000015400001640000000164312237365612015551 0ustar buildbotbuildbot00000000000000from setuptools import setup setup( name='w3lib', version='1.5', license='BSD', description='Library of web-related functions', author='Scrapy project', author_email='info@scrapy.org', url='https://github.com/scrapy/w3lib', packages=['w3lib'], platforms=['Any'], classifiers=[ 'Development Status :: 5 - Production/Stable', 'License :: OSI Approved :: BSD License', 'Operating System :: OS Independent', 'Programming Language :: Python', 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: Implementation :: CPython', 'Programming Language :: Python :: Implementation :: PyPy', 'Topic :: Internet :: WWW/HTTP', ], install_requires=['six >= 1.4.1'], ) w3lib-1.5/PKG-INFO0000644000015400001640000000145112237365631015132 0ustar buildbotbuildbot00000000000000Metadata-Version: 1.1 Name: w3lib Version: 1.5 Summary: Library of web-related functions Home-page: https://github.com/scrapy/w3lib Author: Scrapy project Author-email: info@scrapy.org License: BSD Description: UNKNOWN Platform: Any Classifier: Development Status :: 5 - Production/Stable Classifier: License :: OSI Approved :: BSD License Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.3 Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Programming Language :: Python :: Implementation :: PyPy Classifier: Topic :: Internet :: WWW/HTTP w3lib-1.5/setup.cfg0000644000015400001640000000007312237365631015655 0ustar buildbotbuildbot00000000000000[egg_info] tag_build = tag_date = 0 tag_svn_revision = 0