pax_global_header00006660000000000000000000000064133757624260014531gustar00rootroot0000000000000052 comment=5ae25259a83108673e317b00674e217b9b8fbb6e serverfiles-0.3.0/000077500000000000000000000000001337576242600140625ustar00rootroot00000000000000serverfiles-0.3.0/.gitignore000066400000000000000000000000201337576242600160420ustar00rootroot00000000000000*.egg-info dist serverfiles-0.3.0/LICENSE.txt000066400000000000000000000005721337576242600157110ustar00rootroot00000000000000Copyright (c) 2016 Bioinformatics Laboratory, University of Ljubljana, Faculty of Computer and Information Science All rights reserved. THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT ANY WARRANTY WHATSOEVER. If you use or redistribute this software, you are permitted to do so under the terms of GNU [GPL-3.0]+ license. [GPL-3.0]: https://www.gnu.org/licenses/gpl-3.0.en.html serverfiles-0.3.0/MANIFEST.in000066400000000000000000000001721337576242600156200ustar00rootroot00000000000000include LICENSE.txt recursive-include doc *.rst Makefile *.bat *.py recursive-include tests *.py include requirements.txt serverfiles-0.3.0/README.txt000066400000000000000000000001231337576242600155540ustar00rootroot00000000000000An utility that accesses files on a HTTP server and stores them locally for reuse. serverfiles-0.3.0/doc/000077500000000000000000000000001337576242600146275ustar00rootroot00000000000000serverfiles-0.3.0/doc/.gitignore000066400000000000000000000000061337576242600166130ustar00rootroot00000000000000build serverfiles-0.3.0/doc/Makefile000066400000000000000000000011431337576242600162660ustar00rootroot00000000000000# Minimal makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = python -msphinx SPHINXPROJ = ServerFiles SOURCEDIR = . BUILDDIR = build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) serverfiles-0.3.0/doc/conf.py000066400000000000000000000121051337576242600161250ustar00rootroot00000000000000#!/usr/bin/env python3 # -*- coding: utf-8 -*- # # ServerFiles documentation build configuration file, created by # sphinx-quickstart on Thu Sep 21 15:55:06 2017. # # This file is execfile()d with the current directory set to its # containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # # import os # import sys # sys.path.insert(0, os.path.abspath('.')) # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = ['sphinx.ext.autodoc'] # Add any paths that contain templates here, relative to this directory. templates_path = ['templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] source_suffix = '.rst' # The master toctree document. master_doc = 'index' # General information about the project. project = 'ServerFiles' copyright = '2017, Biolab' author = 'Biolab' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = '0.2' # The full version, including alpha/beta/rc tags. release = '0.2' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path exclude_patterns = ['build', 'Thumbs.db', '.DS_Store'] # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = 'alabaster' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # # html_theme_options = {} # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['static'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. # # This is required for the alabaster theme # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars html_sidebars = { '**': [ 'about.html', 'navigation.html', 'relations.html', # needs 'show_related': True theme option to display 'searchbox.html', 'donate.html', ] } # -- Options for HTMLHelp output ------------------------------------------ # Output file base name for HTML help builder. htmlhelp_basename = 'ServerFilesdoc' # -- Options for LaTeX output --------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'ServerFiles.tex', 'ServerFiles Documentation', 'Biolab', 'manual'), ] # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ (master_doc, 'serverfiles', 'ServerFiles Documentation', [author], 1) ] # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ (master_doc, 'ServerFiles', 'ServerFiles Documentation', author, 'ServerFiles', 'One line description of project.', 'Miscellaneous'), ] serverfiles-0.3.0/doc/index.rst000066400000000000000000000007171337576242600164750ustar00rootroot00000000000000.. ServerFiles documentation master file, created by sphinx-quickstart on Thu Sep 21 15:55:06 2017. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. Welcome to ServerFiles's documentation! ======================================= .. toctree:: :maxdepth: 2 :caption: Contents: serverfiles Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` serverfiles-0.3.0/doc/make.bat000066400000000000000000000014051337576242600162340ustar00rootroot00000000000000@ECHO OFF pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=python -msphinx ) set SOURCEDIR=. set BUILDDIR=_build set SPHINXPROJ=ServerFiles if "%1" == "" goto help %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The Sphinx module was not found. Make sure you have Sphinx installed, echo.then set the SPHINXBUILD environment variable to point to the full echo.path of the 'sphinx-build' executable. Alternatively you may add the echo.Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.http://sphinx-doc.org/ exit /b 1 ) %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% :end popd serverfiles-0.3.0/doc/serverfiles.rst000066400000000000000000000000651337576242600177130ustar00rootroot00000000000000ServerFiles ----------- .. automodule:: serverfiles serverfiles-0.3.0/requirements.txt000066400000000000000000000000211337576242600173370ustar00rootroot00000000000000requests>=2.11.1 serverfiles-0.3.0/serverfiles/000077500000000000000000000000001337576242600164135ustar00rootroot00000000000000serverfiles-0.3.0/serverfiles/__init__.py000066400000000000000000000400161337576242600205250ustar00rootroot00000000000000""" Access and store files when needed. Server with files ================= Server provides files through HTTP. Any HTTP server that can serve static files can work, including Apache, Nginx and Python's HTTP server. Files can be organized in subfolders. Each file can have a corresponding info file (with .info extension). A test server could be made by just creating a new empty folder and creating a subfolder "additional-data" there with the following files:: additional-data/a-very-big-file.txt additional-data/a-very-big-file.txt.info Our .info file should contain the following:: {"tags": [ "huge file", "example" ], "datetime": "2016-10-10 11:39:07"} Then we can start a test server with:: python -m http.server To access the server and download the file we could use:: >>> import serverfiles >>> sf = serverfiles.ServerFiles(server="http://localhost:8000/") >>> sf.listfiles() [('additional-data', 'a-very-big-file.txt')] >>> lf = serverfiles.LocalFiles("sftest", serverfiles=sf) >>> lf.download('additional-data', 'a-very-big-file.txt') Info files =========== Info files, which have an additional .info extension, must be SON dictionaries. Keys that are read by this module are: * datetime ("%Y-%m-%d %H:%M:%S"), * compression (if set, the file is uncompressed automatically, can be one of .bz2, .gz, .tar.gz, .tar.bz2), * and tags (a list of strings). Server query optimization ========================= A server can contain a __INFO__ file in its root folder. This file is a JSON list, whose elements are lists of [ list-of-path, info dictionary ]. If such file exists its contents will be used instead of server queries for file listing and info lookup, which is critical for high latency connections. Such file can be prepared as: >>> sf = ServerFiles(server="yourserver") >>> json.dump(list(sf.allinfo().items()), open("__INFO__", "wt")) If your server already has an __INFO__ file, the above code will just get its contents. Remote files ============ .. autoclass:: ServerFiles :members: Local files =========== .. autoclass:: LocalFiles :members: """ import functools try: import urllib.parse as urlparse except ImportError: import urlparse from contextlib import contextmanager import threading import os import tarfile import gzip import bz2 import datetime import tempfile import json try: from html.parser import HTMLParser except ImportError: from HTMLParser import HTMLParser import shutil import requests import requests.exceptions try: FileNotFoundError except: FileNotFoundError = IOError # default socket timeout in seconds TIMEOUT = 5 def _open_file_info(fname): with open(fname, 'rt') as f: return json.load(f) def _save_file_info(fname, info): with open(fname, 'wt') as f: json.dump(info, f) def _create_path(target): try: os.makedirs(target) except OSError: pass def _is_prefix(pref, whole): if len(pref) > len(whole): return False for a, b in zip(pref, whole): if a != b: return False return True class _FindLinksParser(HTMLParser, object): def __init__(self): super(_FindLinksParser, self).__init__() self.links = [] def handle_starttag(self, tag, attrs): if tag == "a": for name, value in attrs: if name == "href": #ignore navidation and hidden files if value.startswith("?") or value.startswith("/") or \ value.startswith(".") or value.startswith("__"): continue self.links.append(urlparse.unquote(value)) class ServerFiles: """A class for listing or downloading files from the server.""" def __init__(self, server, username=None, password=None): if server.endswith('/'): self.server = server else: self.server = server + '/' """Server URL.""" self.username = username """Username for authenticated HTTP queried.""" self.password = password """Password for authenticated HTTP queried.""" self.req = requests.Session() a = requests.adapters.HTTPAdapter(max_retries=2) self.req.mount('https://', a) self.req.mount('http://', a) # cached info for all files on server # None is not loaded, False if it does not exist self._info = None def _download_server_info(self): if self._info is None: t = self._open("__INFO__") if t.status_code == 200: self._info = {tuple(a): b for a, b in json.loads(t.text)} else: self._info = False #do not check again def listfiles(self, *args, **kwargs): """Return a list of files on the server. Do not list .info files.""" recursive = kwargs.get("recursive", True) self._download_server_info() if self._info: return [a for a in self._info.keys() if _is_prefix(args, a)] text = self._open(*args).text parser = _FindLinksParser() parser.feed(text) links = parser.links files = [args + (f,) for f in links if not f.endswith("/") and not f.endswith(".info")] if recursive: for f in links: if f.endswith("/"): f = f.strip("/") nargs = args + (f,) files.extend([a for a in self.listfiles(*nargs, recursive=True)]) return files def download(self, *path, **kwargs): """ Download a file and name it with target name. Callback is called once for each downloaded percentage. """ callback = kwargs.get("callback", None) target = kwargs.get("target", None) _create_path(os.path.dirname(target)) req = self._open(*path) if req.status_code == 404: raise FileNotFoundError elif req.status_code != 200: raise IOError size = req.headers.get('content-length') if size: size = int(size) f = tempfile.TemporaryFile() chunksize = 1024*8 lastchunkreport= 0.0001 readb = 0 for buf in req.iter_content(chunksize): readb += len(buf) while size and float(readb) / size > lastchunkreport+0.01: lastchunkreport += 0.01 if callback: callback() f.write(buf) f.seek(0) with open(target, "wb") as fo: shutil.copyfileobj(f, fo) if callback and not size: #size was unknown, call callbacks for i in range(99): callback() if callback: callback() def allinfo(self, *path, **kwargs): """Return all info files in a dictionary, where keys are paths.""" recursive = kwargs.get("recursive", True) self._download_server_info() files = self.listfiles(*path, recursive=recursive) infos = {} for npath in files: infos[npath] = self.info(*npath) return infos def search(self, sstrings, **kwargs): """ Search for files on the repository where all substrings in a list are contained in at least one choosen field (tag, title, name). Return a list of tuples: first tuple element is the file's domain, second its name. As for now the search is performed locally, therefore information on files in repository is transfered on first call of this function. """ if self._info is None or self._info is False: self._info = self.allinfo() return _search(self._info, sstrings, **kwargs) def info(self, *path): """Return a dictionary containing repository file info.""" self._download_server_info() if self._info: return self._info.get(path, {}) path = list(path) path[-1] += ".info" t = self._open(*path) if t.status_code == 200: return json.loads(t.text) else: return {} def _server_request(self, root, *path): auth = None if self.username and self.password: auth = (self.username, self.password) return self.req.get(root + "/".join(path), auth=auth, timeout=TIMEOUT, stream=True) def _open(self, *args): return self._server_request(self.server, *args) def _keyed_lock(lock_constructor=threading.Lock): lock = threading.Lock() locks = {} def get_lock(key): with lock: if key not in locks: locks[key] = lock_constructor() return locks[key] return get_lock #using RLock instead of Ales's Orange 2 solution _get_lock = _keyed_lock(threading.RLock) def _split_path(head): out = [] while True: head, tail = os.path.split(head) out.insert(0, tail) if not head: break return out class LocalFiles: """Manage local files.""" def __init__(self, path, serverfiles=None): self.serverfiles_dir = path """A folder downloaded files are stored in.""" _create_path(self.serverfiles_dir) self.serverfiles = serverfiles """A ServerFiles instance.""" @contextmanager def _lock_file(self, *args): path = self.localpath(*args) path = os.path.normpath(os.path.realpath(path)) lock = _get_lock(path) lock.acquire(True) try: yield finally: lock.release() def _locked(f): @functools.wraps(f) def func(self, *path, **kwargs): with self._lock_file(*path): return f(self, *path, **kwargs) func.unwrapped = f return func def localpath(self, *args): """ Return the local location for a file. """ return os.path.join(os.path.expanduser(self.serverfiles_dir), *args) @_locked def download(self, *path, **kwargs): """Download file from the repository. Callback can be a function without arguments and will be called once for each downloaded percent of file: 100 times for the whole file. If extract is True, files marked as compressed will be uncompressed after download.""" extract = kwargs.get("extract", True) callback = kwargs.get("callback", None) info = self.serverfiles.info(*path) extract = extract and "compression" in info target = self.localpath(*path) self.serverfiles.download(*path, target=target + ".tmp" if extract else target, callback=callback) _save_file_info(target + '.info', info) if extract: if info.get("compression") in ["tar.gz", "tar.bz2"]: f = tarfile.open(target + ".tmp") try: os.mkdir(target) except OSError: pass f.extractall(target) elif info.get("compression") == "gz": f = gzip.open(target + ".tmp") shutil.copyfileobj(f, open(target, "wb")) elif info.get("compression") == "bz2": f = bz2.BZ2File(target + ".tmp", "r") shutil.copyfileobj(f, open(target, "wb")) f.close() os.remove(target + ".tmp") @_locked def localpath_download(self, *path, **kwargs): """ Return local path for the given domain and file. If file does not exist, download it. Additional arguments are passed to the :obj:`download` function. """ pathname = self.localpath(*path) if not os.path.exists(pathname): self.download.unwrapped(self, *path, **kwargs) return pathname def listfiles(self, *path): """List files (or folders) in local repository that have corresponding .info files. Do not list .info files.""" dir = self.localpath(*path) files = [] for root, dirs, fnms in os.walk(dir): for f in fnms: if f[-5:] == '.info' and os.path.exists(os.path.join(root, f[:-5])): try: _open_file_info(os.path.join(root, f)) files.append( path + tuple(_split_path( os.path.relpath(os.path.join(root, f[:-5]), start=dir) ))) except ValueError: pass return files def info(self, *path): """Return .info file for a file in a local repository.""" target = self.localpath(*path) return _open_file_info(target + '.info') def allinfo(self, *path): """Return all local info files in a dictionary, where keys are paths.""" files = self.listfiles(*path) dic = {} for filename in files: dic[filename] = self.info(*filename) return dic def needs_update(self, *path): """Return True if a file does not exist in the local repository, if there is a newer version on the server or if either version can not be determined.""" dt_fmt = "%Y-%m-%d %H:%M:%S" try: linfo = self.info(*path) dt_local = datetime.datetime.strptime( linfo["datetime"][:19], dt_fmt) dt_server = datetime.datetime.strptime( self.serverfiles.info(*path)["datetime"][:19], dt_fmt) return dt_server > dt_local except FileNotFoundError: return True except KeyError: return True def update(self, *path, **kwargs): """Download the corresponding file from the server if server copy was updated. """ if self.needs_update(*path): self.download(*path, **kwargs) def search(self, sstrings, **kwargs): """Search for files in the local repository where all substrings in a list are contained in at least one chosen field (tag, title, name). Return a list of tuples: first tuple element is the domain of the file, second its name.""" si = self.allinfo() return _search(si, sstrings, **kwargs) def update_all(self, *path): for fu in self.listfiles(*path): self.update(*fu) @_locked def remove(self, *path): """Remove a file of a path from local repository.""" path = self.localpath(*path) if os.path.exists(path + ".info"): try: if os.path.isdir(path): shutil.rmtree(path) elif os.path.isfile(path): os.remove(path) os.remove(path + ".info") except OSError as ex: print("Failed to delete", path, "due to:", ex) else: raise FileNotFoundError def _search(si, sstrings, case_sensitive=False, in_tag=True, in_title=True, in_name=True): found = [] for path, info in si.items(): target = "" if in_tag: target += " ".join(info.get('tags', [])) if in_title: target += info.get('title', "") if in_name: target += " ".join(path) if not case_sensitive: target = target.lower() match = True for s in sstrings: if not case_sensitive: s = s.lower() if s not in target: match = False break if match: found.append(path) return found def sizeformat(size): """ >>> sizeformat(256) '256 bytes' >>> sizeformat(1024) '1.0 KB' >>> sizeformat(1.5 * 2 ** 20) '1.5 MB' """ for unit in ['bytes', 'KB', 'MB', 'GB', 'TB']: if size < 1024.0: if unit == "bytes": return "%1.0f %s" % (size, unit) else: return "%3.1f %s" % (size, unit) size /= 1024.0 return "%.1f PB" % size if __name__ == '__main__': sf = ServerFiles() lf = LocalFiles() info = sf.allinfo() print(os.getcwd()) with open("__INFO__.json", "wt") as fo: json.dump(list(info.items()), fo) serverfiles-0.3.0/setup.py000066400000000000000000000010761337576242600156000ustar00rootroot00000000000000#!/usr/bin/env python from setuptools import setup if __name__ == '__main__': setup( name='serverfiles', description="An utility that accesses files on a HTTP server and stores them locally for reuse.", author='Bioinformatics Laboratory, FRI UL', author_email='marko.toplak@fri.uni-lj.si', packages=["serverfiles"], install_requires=[ 'requests>=2.11.1', ], version='0.3.0', zip_safe=False, url="https://github.com/biolab/serverfiles", test_suite="tests.suite" ) serverfiles-0.3.0/tests/000077500000000000000000000000001337576242600152245ustar00rootroot00000000000000serverfiles-0.3.0/tests/__init__.py000066400000000000000000000007461337576242600173440ustar00rootroot00000000000000import os import unittest def suite(loader=None, pattern='test*.py'): test_dir = os.path.dirname(__file__) if loader is None: loader = unittest.TestLoader() if pattern is None: pattern = 'test*.py' all_tests = [ loader.discover(test_dir, pattern, "."), ] return unittest.TestSuite(all_tests) def load_tests(loader, tests, pattern): return suite(loader, pattern) if __name__ == '__main__': unittest.main(defaultTest='suite') serverfiles-0.3.0/tests/test_serverfiles.py000066400000000000000000000155001337576242600211670ustar00rootroot00000000000000# Test methods with long descriptive names can omit docstrings # pylint: disable=missing-docstring import unittest import multiprocessing import os import shutil try: from http.server import HTTPServer, SimpleHTTPRequestHandler except ImportError: from SimpleHTTPServer import SimpleHTTPRequestHandler from BaseHTTPServer import HTTPServer import tempfile import gzip import bz2 import tarfile import sys import time import serverfiles try: FileNotFoundError except: FileNotFoundError = IOError DATETIMETEST = "2013-07-03 11:39:07.381031" def create(name, contents): with open(os.path.join(*name), "wt") as f: f.write(contents) def server(path, info): os.chdir(path) os.mkdir("domain1") create(("domain1", "__DUMMY"), "something to ignore") create(("domain1", "withoutinfo"), "without info") create(("domain1", "withinfo"), "with info") create(("domain1", "withinfo.info"), '{"datetime": "%s", "tags": "search"}' % DATETIMETEST) os.mkdir("comp") with gzip.open(os.path.join("comp", "gz"), "wt") as f: f.write("compress") create(("comp", "gz.info"), '{"compression": "gz"}') with bz2.BZ2File(os.path.join("comp", "bz2"), "w") as f: #Python 2.7 compatibility f.write("compress".encode("ascii")) create(("comp", "bz2.info"), '{"compression": "bz2"}') create(("intar",), "compress") with tarfile.open(os.path.join("comp", "tar.gz"), "w") as tar: tar.add(os.path.join("intar")) os.remove("intar") create(("comp", "tar.gz.info"), '{"compression": "tar.gz"}') if info: create(("__INFO__",), '''[[["comp", "gz"], {"compression": "gz"}], [["comp", "bz2"], {"compression": "bz2"}], [["domain1", "withoutinfo"], {}], [["comp", "tar.gz"], {"compression": "tar.gz"}], [["domain1", "withinfo"], {"tags": "search", "datetime": "2013-07-03 11:39:07.381031"}]]''') # http server outputs a line for every connection sys.stderr = open(os.devnull, "w") httpd = HTTPServer(("", 12345), SimpleHTTPRequestHandler) httpd.serve_forever() class TestServerFiles(unittest.TestCase): @classmethod def setUpClass(cls): cls.pathserver = tempfile.mkdtemp() cls.http = multiprocessing.Process(target=server, args=[cls.pathserver, False]) cls.http.daemon = True cls.http.start() @classmethod def tearDownClass(cls): cls.http.terminate() shutil.rmtree(cls.pathserver) def setUp(self): self.sf = serverfiles.ServerFiles(server="http://localhost:12345/") t = time.time() while time.time() - t < 1.: #wait for at most 1 second for server process to come online try: self.sf.info("domain1", "withinfo") break except: pass self.path = tempfile.mkdtemp() self.lf = serverfiles.LocalFiles(path=self.path, serverfiles=self.sf) def tearDown(self): shutil.rmtree(self.path) def test_callback(self): class CB: run = 0 def __call__(self): self.run += 1 cb = CB() self.lf.download("domain1", "withinfo", callback=cb) self.assertEqual(cb.run, 100) def test_listdir_server(self): ldomain = self.sf.listfiles("domain1") self.assertEqual(set(ldomain), set([ ("domain1", "withinfo"), ("domain1", "withoutinfo")])) lall = self.sf.listfiles() self.assertGreaterEqual(set(lall), set(ldomain)) def test_download(self): self.lf.download("domain1", "withinfo") self.lf.download("domain1", "withoutinfo") #file exists on drive self.assertTrue(os.path.exists(os.path.join(self.path, "domain1", "withinfo"))) #downloaded all files llist = self.lf.listfiles("domain1") slist = self.sf.listfiles("domain1") self.assertEqual(set(llist), set(slist)) def test_compressed(self): self.lf.download("comp", "gz") self.lf.download("comp", "bz2") def read(fname): with open(fname, "rt") as f: return f.read() self.assertEqual(read(self.lf.localpath("comp", "gz")), read(self.lf.localpath("comp", "bz2"))) self.lf.download("comp", "tar.gz") self.assertTrue(os.path.isdir(self.lf.localpath("comp", "tar.gz"))) self.assertEqual(read(self.lf.localpath("comp", "tar.gz", "intar")), read(self.lf.localpath("comp", "bz2"))) self.lf.remove("comp", "tar.gz") self.assertFalse(os.path.exists(self.lf.localpath("comp", "tar.gz"))) self.assertFalse(os.path.exists(self.lf.localpath("comp", "tar.gz.info"))) def test_info(self): self.lf.download("domain1", "withinfo") self.lf.download("domain1", "withoutinfo") self.assertEqual(self.lf.info("domain1", "withinfo")["datetime"], DATETIMETEST) self.assertEqual(self.sf.info("domain1", "withinfo")["datetime"], DATETIMETEST) self.assertEqual(self.lf.allinfo()[("domain1", "withinfo")]["datetime"], DATETIMETEST) self.assertEqual(self.sf.allinfo()[("domain1", "withinfo")]["datetime"], DATETIMETEST) self.assertEqual(self.sf.allinfo("domain1"), self.lf.allinfo("domain1")) def test_remove(self): lpath = self.lf.localpath_download("domain1", "withoutinfo") self.assertTrue(os.path.exists(lpath)) self.assertTrue(os.path.exists(lpath + ".info")) self.lf.remove("domain1", "withoutinfo") self.assertFalse(os.path.exists(lpath)) self.assertFalse(os.path.exists(lpath + ".info")) self.assertRaises(FileNotFoundError, lambda: self.lf.remove("domain1", "wrong file")) def test_update(self): self.lf.update_all() self.assertTrue(self.lf.needs_update("domain1", "withinfo")) self.lf.update("domain1", "withinfo") self.assertFalse(self.lf.needs_update("domain1", "withinfo")) self.lf.update("domain1", "withoutinfo") self.assertTrue(self.lf.needs_update("domain1", "withoutinfo")) self.lf.update_all() def test_search(self): self.lf.download("domain1", "withinfo") self.lf.download("domain1", "withoutinfo") self.assertEqual(self.lf.search("without"), [("domain1", "withoutinfo")]) self.assertEqual(len(self.lf.search("domain1")), 2) self.assertEqual(len(self.sf.search("domain1")), 2) self.assertEqual(self.sf.search("search"), [("domain1", "withinfo")]) class TestServerFilesInfo(TestServerFiles): """ Repeats the same tests with __INFO__ file. """ @classmethod def setUpClass(cls): cls.pathserver = tempfile.mkdtemp() cls.http = multiprocessing.Process(target=server, args=[cls.pathserver, True]) cls.http.daemon = True cls.http.start()