Where can I get"
"some?
There are many variations of passages of Lorem Ipsum"
"available, but the majority have suffered alteration in some form, by"
"injected humour, or randomised words which don't look even slightly"
"believable. If you are going to use a passage of Lorem Ipsum, you need"
"to be sure there isn't anything embarrassing hidden in the middle of"
"text. All the Lorem Ipsum generators on the Internet tend to repeat"
"predefined chunks as necessary, making this the first true generator"
"on the Internet. It uses a dictionary of over 200 Latin words,"
"combined with a handful of model sentence structures, to generate"
"Lorem Ipsum which looks reasonable. The generated Lorem Ipsum is"
"therefore always free from repetition, injected humour, or"
"non-characteristic words etc.
"
)
@pytest.fixture(scope="module")
def lipsum_item(lipsum):
return StaticItem(path=HOME_PATH, content=lipsum, mimetype="text/html")
def test_imports():
assert libzim.writer.Compression
assert libzim.writer.Blob
assert libzim.writer.Item
assert libzim.writer.ContentProvider
assert libzim.writer.FileProvider
assert libzim.writer.StringProvider
assert libzim.writer.Creator
def test_creator_filename(fpath):
with Creator(fpath) as c:
assert c.filename == fpath
assert Archive(fpath).filename == fpath
def test_creator_repr(fpath):
with Creator(fpath) as c:
assert str(fpath) in str(c)
def get_creator_output(fpath, verbose):
"""run creator with configVerbose(verbose) and return its stdout as str"""
code = """
from libzim.writer import Creator
with Creator(r"{fpath}").config_verbose({verbose}) as creator:
pass
""".replace(
"{fpath}", str(fpath)
).replace(
"{verbose}", str(verbose)
)
ps = subprocess.run(
[sys.executable, "-c", code],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
check=False,
)
assert ps.returncode == 0
return ps.stdout
@pytest.mark.parametrize("verbose", [True, False])
def test_creator_verbose(fpath, verbose):
output = get_creator_output(fpath, verbose).strip()
lines = output.splitlines()
if verbose:
assert "T:" in output
assert len(lines) >= 5
else:
assert len(lines) == 2
def test_creator_compression(fpath, lipsum_item):
"""make sure we can create ZIM files with various compression algorithms
also makes sure we're getting different sizes using diffrent alg.
based on a piece of text that should give different results"""
filesizes = {}
for comp in libzim.writer.Compression.__members__.keys():
fpath_str = fpath.with_name(f"{fpath.name}_{comp}_str.zim")
with Creator(fpath_str).config_compression(comp) as c:
c.add_item(lipsum_item)
fpath_val = fpath.with_name(f"{fpath.name}_{comp}_val.zim")
comp_val = getattr(libzim.writer.Compression, comp)
with Creator(fpath_val).config_compression(comp_val) as c:
c.add_item(lipsum_item)
assert Archive(fpath_str).checksum
assert Archive(fpath_str).filesize == Archive(fpath_val).filesize
filesizes[comp] = Archive(fpath_str).filesize
for a, b in itertools.combinations(filesizes.keys(), 2):
assert filesizes[a] != filesizes[b]
# now don't specify
with Creator(fpath) as c:
c.add_item(lipsum_item)
# default should be zstd
assert Archive(fpath).filesize == filesizes["zstd"]
@pytest.mark.parametrize("cluster_size", [0, 128, 512, 8196, 10240])
def test_creator_clustersize(fpath, cluster_size, lipsum_item):
"""ensure we can create ZIM with arbitrary min-cluster-size"""
with Creator(fpath).config_clustersize(cluster_size) as c:
c.add_item(lipsum_item)
@pytest.mark.parametrize(
"indexing, language, expected",
[
(False, "a", 0),
(False, "eng", 0),
(True, "eng", 1),
(True, "en", 1),
(True, "fra", 1),
(True, "fr", 1),
],
)
def test_creator_indexing(fpath, lipsum_item, indexing, language, expected):
with Creator(fpath).config_indexing(indexing, language) as c:
c.add_item(lipsum_item)
zim = Archive(fpath)
assert zim.has_fulltext_index == indexing
if indexing:
query = Query().set_query("standard")
searcher = Searcher(zim)
search = searcher.search(query)
assert search.getEstimatedMatches() == expected
@pytest.mark.parametrize("nb_workers", [1, 2, 3, 5])
def test_creator_nbworkers(fpath, lipsum_item, nb_workers):
with Creator(fpath).config_nbworkers(nb_workers) as c:
c.add_item(lipsum_item)
def test_creator_combine_config(fpath, lipsum_item):
with (
Creator(fpath)
.config_verbose(True)
.config_compression("zstd")
.config_clustersize(1024)
.config_indexing(True, "eng")
.config_nbworkers(2) as c
):
c.add_item(lipsum_item)
@pytest.mark.parametrize(
"name, args",
[
("verbose", (True,)),
("compression", ("zstd",)),
("clustersize", (1024,)),
("indexing", (True, "eng")),
("nbworkers", (2,)),
],
)
def test_creator_config_poststart(fpath, name, args):
with Creator(fpath) as c:
with pytest.raises(RuntimeError, match="started"):
getattr(c, f"config_{name}")(*args)
def test_creator_nocontext(fpath, lipsum_item):
"""ensure we can use the creator linearily"""
creator = Creator(fpath)
exc_type, exc_val, exc_tb = None, None, None
creator.__enter__()
creator.add_metadata("Name", "name")
creator.add_item(lipsum_item)
with pytest.raises(RuntimeError):
creator.config_verbose(True)
creator.__exit__(exc_type, exc_val, exc_tb)
# now with an exception
creator = Creator(fpath)
creator.__enter__()
creator.add_item(lipsum_item)
try:
creator.add_redirection("A", HOME_PATH) # pyright: ignore [reportCallIssue]
except Exception:
exc_type, exc_val, exc_tb = sys.exc_info()
with pytest.raises(TypeError):
raise
creator.__exit__(exc_type, exc_val, exc_tb)
def test_creator_subclass(fpath, lipsum_item):
class ACreator(Creator):
def __init__(self, fpath, tata):
super().__init__(filename=fpath)
self.ready = False
def __exit__(self, exc_type, exc_val, exc_tb):
super().__exit__(exc_type, exc_val, exc_tb)
self.ready = True
creator = ACreator(fpath, tata=2)
assert creator.ready is False
with creator:
assert creator.ready is False
creator.add_item(lipsum_item)
assert creator.ready is True
def test_creator_mainpath(fpath, lipsum_item):
main_path = HOME_PATH
with Creator(fpath).set_mainpath(main_path) as c:
c.add_item(lipsum_item)
zim = Archive(fpath)
assert zim.has_main_entry is True
assert zim.main_entry.path == "mainPage"
assert zim.main_entry.get_item().path == main_path
del zim
fpath.unlink()
with Creator(fpath) as c:
c.add_item(lipsum_item)
zim = Archive(fpath)
assert zim.has_main_entry is False
with pytest.raises(RuntimeError):
assert zim.main_entry
def test_creator_illustration(fpath, favicon_data):
with Creator(fpath) as c:
c.add_illustration(48, favicon_data)
c.add_illustration(96, favicon_data)
zim = Archive(fpath)
assert zim.has_illustration() is True
assert zim.has_illustration(48) is True
assert zim.has_illustration(96) is True
assert zim.has_illustration(128) is False
assert bytes(zim.get_illustration_item().content) == favicon_data
assert bytes(zim.get_illustration_item(96).content) == favicon_data
assert zim.get_illustration_sizes() == {48, 96}
def test_creator_additem(fpath, lipsum_item):
# ensure we can't add if not started
c = Creator(fpath)
with pytest.raises(RuntimeError, match="not started"):
c.add_item(lipsum_item)
del c
with Creator(fpath) as c:
c.add_item(lipsum_item)
with pytest.raises(TypeError, match="must not be None"):
c.add_item(None) # pyright: ignore [reportCallIssue, reportArgumentType]
with pytest.raises(RuntimeError):
c.add_item("hello") # pyright: ignore [reportCallIssue, reportArgumentType]
with pytest.raises(TypeError, match="takes exactly 1 positional argument"):
c.add_item(mimetype="text/html") # pyright: ignore [reportCallIssue]
def test_creator_metadata(fpath, lipsum_item):
metadata = {
# kiwix-mandatory
"Name": "wikipedia_fr_football",
"Title": "English Wikipedia",
"Creator": "English speaking Wikipedia contributors",
"Publisher": "Wikipedia user Foobar",
"Date": "2009-11-21",
"Description": "All articles (without images) from the english Wikipedia",
"Language": "eng",
# optional
"Longdescription": (
"This ZIM file contains all articles (without images) "
"from the english Wikipedia by 2009-11-10."
" The topics are ..."
),
"Licence": "CC-BY",
"Tags": "wikipedia;_category:wikipedia;_pictures:no;"
"_videos:no;_details:yes;_ftindex:yes",
"Flavour": "nopic",
"Source": "https://en.wikipedia.org/",
"Scraper": "sotoki 1.2.3",
}
# ensure we can't add if not started
c = Creator(fpath)
with pytest.raises(RuntimeError, match="not started"):
key = next(iter(metadata.keys()))
c.add_metadata(key, metadata[key])
del c
with Creator(fpath) as c:
c.add_item(lipsum_item)
for name, value in metadata.items():
if name == "Date":
continue
c.add_metadata(name, value)
mdate = datetime.date(*[int(x) for x in metadata.get("Date", "").split("-")])
c.add_metadata("Date", mdate)
zim = Archive(fpath)
for name, value in metadata.items():
assert zim.get_metadata(name).decode("UTF-8") == value
def test_creator_metadata_overwrite(fpath, lipsum_item, favicon_data):
"""re-adding an Entry (even Metadata) now raises an exception (libzim 7.2+)"""
with Creator(fpath) as c:
c.add_item(lipsum_item)
with pytest.raises(RuntimeError, match="Impossible to add"):
c.add_item(lipsum_item)
c.add_metadata("Key", "first")
with pytest.raises(RuntimeError, match="Impossible to add"):
c.add_metadata("Key", "second")
c.add_redirection("home", lipsum_item.get_path(), "Home", {})
with pytest.raises(RuntimeError, match="Impossible to add"):
c.add_redirection("home", lipsum_item.get_path(), "Home again", {})
c.add_illustration(48, favicon_data)
# this currently segfaults but it should not
with pytest.raises(RuntimeError, match="Impossible to add"):
c.add_illustration(48, favicon_data)
zim = Archive(fpath)
assert zim.get_metadata("Key").decode("UTF-8") == "first"
def test_creator_redirection(fpath, lipsum_item):
# ensure we can't add if not started
c = Creator(fpath)
with pytest.raises(RuntimeError, match="not started"):
c.add_redirection("home", "hello", HOME_PATH, {Hint.FRONT_ARTICLE: True})
del c
with Creator(fpath) as c:
c.add_item(lipsum_item)
c.add_redirection("home", "hello", HOME_PATH, {Hint.FRONT_ARTICLE: True})
c.add_redirection("accueil", "bonjour", HOME_PATH, {Hint.FRONT_ARTICLE: True})
zim = Archive(fpath)
assert zim.entry_count == 3
assert zim.has_entry_by_path("home") is True
assert zim.has_entry_by_path("accueil") is True
assert zim.get_entry_by_path("home").is_redirect
assert (
zim.get_entry_by_path("home").get_redirect_entry().path
== zim.get_entry_by_path(HOME_PATH).path
)
assert zim.get_entry_by_path("accueil").get_item().path == HOME_PATH
# suggestions
sugg_searcher = SuggestionSearcher(zim)
sugg_hello = sugg_searcher.suggest("hello")
assert "home" in list(sugg_hello.getResults(0, sugg_hello.getEstimatedMatches()))
sugg_bonjour = sugg_searcher.suggest("bonjour")
assert "accueil" in list(
sugg_bonjour.getResults(0, sugg_hello.getEstimatedMatches())
)
def test_creator_alias(fpath, lipsum_item):
# ensure we can't add if not started
c = Creator(fpath)
with pytest.raises(RuntimeError, match="not started"):
c.add_redirection("home", "hello", HOME_PATH, {Hint.FRONT_ARTICLE: True})
del c
with Creator(fpath) as c:
c.add_item(lipsum_item)
c.add_alias("home", "hello", HOME_PATH, {Hint.FRONT_ARTICLE: True})
with pytest.raises(RuntimeError, match="doesn't exist"):
c.add_alias(
"accueil",
"bonjour",
HOME_PATH + "_non_existent",
{Hint.FRONT_ARTICLE: True},
)
zim = Archive(fpath)
assert zim.entry_count == 2
assert zim.has_entry_by_path("home") is True
assert zim.has_entry_by_path("accueil") is False
assert not zim.get_entry_by_path("home").is_redirect
assert (
zim.get_entry_by_path("home").get_item().content
== zim.get_entry_by_path(HOME_PATH).get_item().content
)
def test_item_notimplemented(fpath, lipsum_item):
item = Item()
for member in ("path", "title", "mimetype", "contentprovider"):
with pytest.raises(NotImplementedError):
getattr(item, f"get_{member}")()
assert HOME_PATH in str(lipsum_item)
assert lipsum_item.get_title() in str(lipsum_item)
def test_contentprovider(fpath):
cp = ContentProvider()
for member in ("get_size", "gen_blob"):
with pytest.raises(NotImplementedError):
getattr(cp, member)()
def test_fileprovider(fpath, lipsum):
lipsum_fpath = fpath.with_name("lipsum.html")
with open(lipsum_fpath, "w") as fh:
for _ in range(0, 10):
fh.write(lipsum)
item = StaticItem(path=HOME_PATH, filepath=lipsum_fpath, mimetype="text/html")
assert HOME_PATH in str(item)
assert item.get_title() in str(item)
with Creator(fpath) as c:
c.add_item(item)
zim = Archive(fpath)
with open(lipsum_fpath, "rb") as fh:
assert bytes(zim.get_entry_by_path(HOME_PATH).get_item().content) == fh.read()
# test feed streaming
cp = item.get_contentprovider()
b = cp.feed()
while b.size():
assert isinstance(b, Blob)
b = cp.feed()
def test_stringprovider(fpath, lipsum):
item = StaticItem(path=HOME_PATH, content=lipsum, mimetype="text/html")
assert HOME_PATH in str(item)
assert item.get_title() in str(item)
with Creator(fpath) as c:
c.add_item(item)
zim = Archive(fpath)
assert bytes(zim.get_entry_by_path(HOME_PATH).get_item().content) == lipsum.encode(
"UTF-8"
)
# test feed streaming
cp = item.get_contentprovider()
b = cp.feed()
while b.size():
assert isinstance(b, Blob)
b = cp.feed()
def test_item_contentprovider_none(fpath):
class AnItem:
def get_path(self):
return ""
def get_title(self):
return ""
def get_mimetype(self):
return ""
def get_contentprovider(self):
return ""
def get_hints(self):
return {}
with Creator(fpath) as c:
with pytest.raises(RuntimeError, match="ContentProvider is None"):
c.add_item(AnItem()) # pyright: ignore [reportArgumentType]
def test_missing_contentprovider(fpath):
class AnItem:
def get_path(self):
return ""
def get_title(self):
return ""
def get_mimetype(self):
return ""
def get_hints(self):
return {}
with Creator(fpath) as c:
with pytest.raises(RuntimeError, match="has no attribute"):
c.add_item(AnItem()) # pyright: ignore [reportArgumentType]
def test_missing_hints(fpath):
class AnItem:
def get_path(self):
return ""
def get_title(self):
return ""
def get_mimetype(self):
return ""
with Creator(fpath) as c:
with pytest.raises(RuntimeError, match="has no attribute 'get_hints'"):
c.add_item(AnItem()) # pyright: ignore [reportArgumentType]
with pytest.raises(RuntimeError, match="must be implemented"):
c.add_item(libzim.writer.Item())
def test_nondict_hints(fpath):
with Creator(fpath) as c:
with pytest.raises(RuntimeError, match="has no attribute 'items'"):
c.add_item(StaticItem(path="1", title="", hints=1))
with pytest.raises(TypeError, match="hints"):
c.add_redirection(
"a", "", "b", hints=1 # pyright: ignore [reportArgumentType]
)
def test_hints_values(fpath):
with Creator(fpath) as c:
# correct values
c.add_item(StaticItem(path="0", title="", hints={}))
c.add_item(
StaticItem(
path="1",
title="",
hints={Hint.FRONT_ARTICLE: True, Hint.COMPRESS: False},
)
)
# non-expected Hints are ignored
c.add_item(StaticItem(path="2", title="", hints={"hello": "world"}))
# Hint values are casted to bool
c.add_item(StaticItem(path="3", title="", hints={Hint.FRONT_ARTICLE: "world"}))
c.add_redirection(
path="4", title="", targetPath="0", hints={Hint.COMPRESS: True}
)
# filtered-out values
c.add_item(StaticItem(path="5", title="", hints={5: True}))
c.add_item(StaticItem(path="6", title="", hints={"yolo": True}))
c.add_item(StaticItem(path="7", title="", hints={"FRONT_ARTICLE": True}))
c.add_item(StaticItem(path="8", title="", hints={0: True}))
# non-existent Hint
with pytest.raises(AttributeError, match="YOLO"):
c.add_item(
StaticItem(
path="0",
title="",
hints={
Hint.YOLO: True # pyright: ignore [reportAttributeAccessIssue]
},
)
)
with pytest.raises(AttributeError, match="YOLO"):
c.add_redirection( # pyright: ignore [reportCallIssue]
path="5",
title="",
targetPath="0",
hints={Hint.YOLO: True}, # pyright: ignore [reportAttributeAccessIssue]
)
@pytest.mark.parametrize(
"indexData, customContent, search_expected",
[
(None, "", [("standard", 1), ("home", 0), ("computer", 0)]),
(False, "", [("standard", 1), ("home", 0), ("computer", 0)]),
(True, "home", [("standard", 1), ("home", 1), ("computer", 0)]),
(True, "computer", [("standard", 1), ("home", 0), ("computer", 1)]),
(True, "standard", [("standard", 2), ("home", 0), ("computer", 0)]),
],
)
def test_custom_indexdata(
fpath, lipsum_item, lipsum, indexData, customContent, search_expected
):
item = StaticItem(path=HOME_PATH + "custom", content=lipsum, mimetype="text/html")
if indexData is None:
item.get_indexdata = lambda: None
else:
class CustomIndexData(IndexData):
def has_indexdata(self):
return indexData
def get_title(self):
return ""
def get_content(self):
return customContent
def get_keywords(self):
return ""
def get_wordcount(self):
return 1
item.get_indexdata = CustomIndexData
with Creator(fpath).config_indexing(True, "eng") as c:
c.add_item(lipsum_item)
c.add_item(item)
zim = Archive(fpath)
searcher = Searcher(zim)
for search_query, expected in search_expected:
query = Query().set_query(search_query)
search = searcher.search(query)
assert search.getEstimatedMatches() == expected
def test_indexdata_interface():
default_id = IndexData()
assert default_id.has_indexdata() is False
for method in ("title", "content", "keywords", "wordcount"):
with pytest.raises(NotImplementedError):
getattr(default_id, f"get_{method}")()
assert default_id.get_geoposition() is None
def test_exc_in_indexdata(fpath, lipsum):
item = StaticItem(path=HOME_PATH + "custom", content=lipsum, mimetype="text/html")
class CustomIndexData(IndexData):
def has_indexdata(self):
raise IndexError
item.get_indexdata = CustomIndexData
with pytest.raises(RuntimeError, match="IndexError"):
with Creator(fpath).config_indexing(True, "eng") as c:
c.add_item(item)
def test_reimpfeed(fpath):
class AContentProvider:
def __init__(self):
self.called = False
def get_size(self):
return 1
def feed(self):
if self.called:
return Blob("")
self.called = True
return Blob("1")
class AnItem(Item):
def get_path(self):
return "-"
def get_title(self):
return ""
def get_mimetype(self):
return ""
def get_hints(self):
return {}
def get_contentprovider(self):
return AContentProvider()
with Creator(fpath) as c:
c.add_item(AnItem())
item = AnItem()
cp = item.get_contentprovider()
assert cp.get_size() == 1
assert cp.feed().size() == 1
def test_virtualmethods_int_exc(fpath):
class AContentProvider:
def get_size(self):
return ""
def feed(self):
return Blob("")
class AnItem(Item):
def get_path(self):
return ""
def get_title(self):
return ""
def get_mimetype(self):
return ""
def get_hints(self):
return {}
def get_contentprovider(self):
return AContentProvider()
with Creator(fpath) as c:
with pytest.raises(RuntimeError, match="TypeError: an integer is required"):
c.add_item(AnItem())
def test_creator_badfilename(tmpdir):
if platform.system() != "Windows" and os.getuid() != 0:
# lack of perm
with pytest.raises(IOError):
Creator(pathlib.Path("/root/test.zim"))
# forward slash points to non-existing folder
with pytest.raises(IOError):
Creator(tmpdir / "test/test.zim")
def test_accented_search_from_libzim(fpath):
"""copy of libzim accented search test
https://github.com/openzim/libzim/blob/main/test/search.cpp#L290 (88543b00)"""
with Creator(fpath).config_verbose(True).config_indexing(True, "eng") as creator:
creator.add_item(
StaticItem(
path="path0",
title="Test Article0",
content="This is a tèst articlé. temp0",
mimetype="text/html",
)
)
creator.add_item(
StaticItem(
path="path1",
title="Test Article1",
content="This is another test article. For article1.",
mimetype="text/html",
)
)
zim = Archive(fpath)
assert zim.entry_count == 2
assert zim.article_count == 2
assert zim.all_entry_count == 7
ascii_query = Query().set_query("test article")
ascii_searcher = Searcher(zim)
ascii_search = ascii_searcher.search(ascii_query)
assert ascii_search.getEstimatedMatches() == zim.article_count
assert list(ascii_search.getResults(0, zim.article_count)) == ["path0", "path1"]
accented_query = Query().set_query("test àrticlé")
accented_searcher = Searcher(zim)
accented_search = accented_searcher.search(accented_query)
assert accented_search.getEstimatedMatches() == zim.article_count
assert list(accented_search.getResults(0, zim.article_count)) == ["path0", "path1"]
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1728999045.0
libzim-3.6.0/tests/test_libzim_reader.py 0000644 0001751 0000177 00000044063 14703467205 020015 0 ustar 00runner docker #!/usr/bin/env python
import gc
import os
import pathlib
import uuid
from urllib.request import urlretrieve
import pytest
import libzim.writer # pyright: ignore [reportMissingModuleSource]
from libzim.reader import Archive # pyright: ignore [reportMissingModuleSource]
from libzim.search import Query, Searcher # pyright: ignore [reportMissingModuleSource]
from libzim.suggestion import ( # pyright: ignore [reportMissingModuleSource]
SuggestionSearcher,
)
# expected data for tests ZIMs (see `all_zims`)
ZIMS_DATA = {
"blank.zim": {
"filename": "blank.zim",
"filesize": 2197,
"new_ns": True,
"mutlipart": False,
"zim_uuid": None,
"metadata_keys": ["Counter"],
"test_metadata": None,
"test_metadata_value": None,
"has_main_entry": False,
"has_favicon_entry": False,
"has_fulltext_index": False,
"has_title_index": False,
"has_checksum": True,
"checksum": None,
"is_valid": True,
"entry_count": 0,
"all_entry_count": 2,
"article_count": 0,
"media_count": 0,
"suggestion_string": None,
"suggestion_count": 0,
"suggestion_result": [],
"search_string": None,
"search_count": 0,
"search_result": [],
"test_path": None,
"test_title": None,
"test_mimetype": None,
"test_size": None,
"test_content_includes": None,
"test_redirect": None,
"test_redirect_to": None,
},
"zimfile.zim": {
"filename": "zimfile.zim",
"filesize": 569304,
"new_ns": False,
"mutlipart": False,
"zim_uuid": "6f1d19d0633f087bfb557ac324ff9baf",
"metadata_keys": [
"Counter",
"Creator",
"Date",
"Description",
"Flavour",
"Language",
"Name",
"Publisher",
"Scraper",
"Tags",
"Title",
],
"test_metadata": "Name",
"test_metadata_value": "wikipedia_en_ray_charles",
"has_main_entry": True,
"has_favicon_entry": True,
"has_fulltext_index": True,
"has_title_index": True,
"has_checksum": True,
"checksum": None,
"is_valid": True,
"entry_count": 371,
"all_entry_count": 371,
"article_count": 129,
"media_count": 45,
"suggestion_string": "lucky",
"suggestion_count": 1,
"suggestion_result": ["A/That_Lucky_Old_Sun"],
"search_string": "lucky",
"search_count": 1,
"search_result": ["A/That_Lucky_Old_Sun"],
"test_path": "A/A_Song_for_You",
"test_title": "A Song for You",
"test_mimetype": "text/html",
"test_size": 7461,
"test_content_includes": "which was released in 1970 on Shelter Records",
"test_redirect": "A/What_I_Say",
"test_redirect_to": "A/What'd_I_Say",
},
"example.zim": {
"filename": "example.zim",
"filesize": 259145,
"new_ns": True,
"mutlipart": False,
"zim_uuid": "5dc0b3af5df20925f0cad2bf75e78af6",
"metadata_keys": [
"Counter",
"Creator",
"Date",
"Description",
"Language",
"Publisher",
"Scraper",
"Tags",
"Title",
],
"test_metadata": "Title",
"test_metadata_value": "Wikibooks",
"has_main_entry": True,
"has_favicon_entry": False,
"has_fulltext_index": True,
"has_title_index": True,
"has_checksum": True,
"checksum": "abcd818c87079cb29282282b47ee46ec",
"is_valid": True,
"entry_count": 60,
"all_entry_count": 75,
"article_count": 0,
"media_count": 22,
"suggestion_string": "Free",
"suggestion_count": 1,
"suggestion_result": [
"FreedomBox for Communities_Offline Wikipedia "
"- Wikibooks, open books for an open world.html"
],
"search_string": "main",
"search_count": 2,
"search_result": [
"Wikibooks.html",
"FreedomBox for Communities_Offline Wikipedia "
"- Wikibooks, open books for an open world.html",
],
"test_path": "FreedomBox for Communities_Offline Wikipedia - Wikibooks, "
"open books for an open world.html",
"test_title": "FreedomBox for Communities/Offline Wikipedia - Wikibooks, "
"open books for an open world",
"test_mimetype": "text/html",
"test_size": 52771,
"test_content_includes": "looking forward to your contributions.",
"test_redirect": None,
},
"corner_cases%23%26.zim": {
"filename": "corner_cases%23%26.zim",
"filesize": 35991,
"new_ns": True,
"mutlipart": False,
"zim_uuid": "702abcbe6fe926152f5d451af7986437",
"metadata_keys": [
"Counter",
"Date",
"Illustration_48x48@1",
"Language",
"Scraper",
"Tags",
"Title",
],
"test_metadata": "Title",
"test_metadata_value": "ZIM corner cases",
"has_main_entry": True,
"has_favicon_entry": True,
"has_fulltext_index": False,
"has_title_index": True,
"has_checksum": True,
"checksum": None,
"is_valid": True,
"entry_count": 7,
"all_entry_count": 18,
"article_count": 2,
"media_count": 1,
"suggestion_string": "c#",
"suggestion_count": 1,
"suggestion_result": ["c#.html"],
"search_string": None,
"search_count": 0,
"search_result": [],
"test_path": "empty.html",
"test_title": "empty.html",
"test_mimetype": "text/html",
"test_size": 0,
"test_content_includes": "",
"test_redirect": None,
"test_redirect_to": None,
},
"small.zim": {
"filename": "small.zim",
"filesize": 41155,
"new_ns": True,
"mutlipart": False,
"zim_uuid": "3581ae7eedd57e6cd2f1c0cab073643f",
"metadata_keys": [
"Counter",
"Creator",
"Date",
"Description",
"Illustration_48x48@1",
"Language",
"Publisher",
"Scraper",
"Tags",
"Title",
],
"test_metadata": "Title",
"test_metadata_value": "Test ZIM file",
"has_main_entry": True,
"has_favicon_entry": True,
"has_fulltext_index": False,
"has_title_index": True,
"has_checksum": True,
"checksum": None,
"is_valid": True,
"entry_count": 2,
"all_entry_count": 16,
"article_count": 1,
"media_count": 1,
"suggestion_string": None,
"suggestion_count": None,
"suggestion_result": None,
"search_string": None,
"search_count": None,
"search_result": None,
"test_path": "main.html",
"test_title": "Test ZIM file",
"test_mimetype": "text/html",
"test_size": 207,
"test_content_includes": "Test ZIM file",
"test_redirect": None,
"test_redirect_to": None,
},
}
skip_if_offline = pytest.mark.skipif(
bool(os.getenv("OFFLINE")), reason="OFFLINE environ requested offline-only"
)
def get_pytest_param(name, *fields):
args = [ZIMS_DATA[name].get(field, f"MISSING-VALUE {field}") for field in fields]
return pytest.param(*args)
def get_pytest_params_list(*fields):
return [get_pytest_param(name, *fields) for name in ZIMS_DATA.keys()]
def parametrize_for(fields):
return (
", ".join(fields),
get_pytest_params_list(*fields),
)
@pytest.fixture(scope="module")
def all_zims(tmpdir_factory):
"""creates a temp dir with all ZIM files inside:
- downloaded ones from libzim
- blank one created with pylibzim"""
temp_dir = tmpdir_factory.mktemp("data")
libzim_urls = [
f"https://github.com/kiwix/libkiwix/raw/main/test/data/{name}"
for name in ("zimfile.zim", "example.zim", "corner_cases%23%26.zim")
] + ["https://github.com/openzim/zim-testing-suite/raw/main/data/nons/small.zim"]
# download libzim tests
for url in libzim_urls:
urlretrieve(url, temp_dir / os.path.basename(url)) # noqa: S310 # nosec
# create blank using pylibzim
creator = libzim.writer.Creator(temp_dir / "blank.zim")
with creator:
pass
return pathlib.Path(temp_dir)
def test_open_badfile(tmpdir):
fpath = tmpdir / "not-exist.zim"
with pytest.raises(RuntimeError):
Archive(fpath)
fpath = tmpdir / "not-zim.zim"
with open(fpath, "w") as fh:
fh.write("text file")
with pytest.raises(RuntimeError):
Archive(fpath)
@skip_if_offline
def test_content_ref_keep(all_zims):
"""Get the memoryview on a content and loose the reference on the article.
We try to load a lot of other articles to detect possible use of dandling pointer
"""
archive = Archive(all_zims / "zimfile.zim")
content = None
def get_content():
nonlocal content
entry = archive.get_entry_by_path("A/That_Lucky_Old_Sun")
item = entry.get_item()
assert isinstance(item.content, memoryview)
content = item.content
get_content() # Now we have a content but no reference to the entry/item.
gc.collect()
# Load a lot of content
for i in range(0, archive.entry_count, 2):
entry = archive._get_entry_by_id(i)
if not entry.is_redirect:
_ = entry.get_item().content
# Check everything is ok
assert content and len(content) == 3559
assert content and (
bytes(content[:100]) == b'\n\n '
b'
\n
That Lucky Old Sun<'
)
@skip_if_offline
@pytest.mark.parametrize(
*parametrize_for(["filename", "filesize", "new_ns", "mutlipart", "zim_uuid"])
)
def test_reader_archive(all_zims, filename, filesize, new_ns, mutlipart, zim_uuid):
fpath = all_zims / filename
zim = Archive(fpath)
# check externaly verifiable data
assert zim.filename == fpath
assert zim.filesize == os.path.getsize(fpath)
if filesize is not None:
assert zim.filesize == filesize
assert zim.has_new_namespace_scheme is new_ns
assert zim.is_multipart is mutlipart
assert str(fpath) in str(zim)
# ensure uuid is returned
assert isinstance(zim.uuid, uuid.UUID)
assert len(zim.uuid.hex) == 32
if zim_uuid:
assert zim.uuid.hex == zim_uuid
@skip_if_offline
@pytest.mark.parametrize(
*parametrize_for(
["filename", "metadata_keys", "test_metadata", "test_metadata_value"]
)
)
def test_reader_metadata(
all_zims, filename, metadata_keys, test_metadata, test_metadata_value
):
zim = Archive(all_zims / filename)
# make sure metadata_keys is empty
assert zim.metadata_keys == metadata_keys
if test_metadata:
assert zim.get_metadata(test_metadata).decode("UTF-8") == test_metadata_value
item = zim.get_metadata_item(test_metadata)
assert item.mimetype in ("text/plain", "text/plain;charset=utf-8") # newer
assert item.size > 1
@skip_if_offline
@pytest.mark.parametrize(
*parametrize_for(["filename", "new_ns", "has_main_entry", "has_favicon_entry"])
)
def test_reader_main_favicon_entries(
all_zims, filename, new_ns, has_main_entry, has_favicon_entry
):
zim = Archive(all_zims / filename)
# make sure we have no main entry
assert zim.has_main_entry is has_main_entry
if has_main_entry is False:
with pytest.raises(RuntimeError):
assert zim.main_entry
else:
assert zim.main_entry
if new_ns:
assert zim.main_entry.path == "mainPage"
# make sure we have no favicon entry
assert zim.has_illustration(48) is has_favicon_entry
if has_favicon_entry:
assert 48 in zim.get_illustration_sizes()
if has_favicon_entry is False:
with pytest.raises(KeyError):
assert zim.get_illustration_item(48)
else:
assert zim.get_illustration_item()
if new_ns:
assert zim.get_illustration_item().path == "Illustration_48x48@1"
assert zim.get_illustration_sizes() == {48}
assert zim.get_metadata_item("Illustration_48x48@1").mimetype == "image/png"
@skip_if_offline
@pytest.mark.parametrize(
*parametrize_for(["filename", "has_fulltext_index", "has_title_index"])
)
def test_reader_has_index(all_zims, filename, has_fulltext_index, has_title_index):
zim = Archive(all_zims / filename)
# we should not get a fulltext index but title should
assert zim.has_fulltext_index is has_fulltext_index
assert zim.has_title_index is has_title_index
@skip_if_offline
@pytest.mark.parametrize(*parametrize_for(["filename", "has_checksum", "is_valid"]))
def test_reader_checksum(all_zims, filename, has_checksum, is_valid):
zim = Archive(all_zims / filename)
# verify checksum
assert zim.has_checksum is has_checksum
assert isinstance(zim.checksum, str)
assert len(zim.checksum) == 32 if has_checksum else 0
assert zim.checksum != zim.uuid
assert zim.check() is is_valid
@skip_if_offline
@pytest.mark.parametrize(
*parametrize_for(
[
"filename",
"entry_count",
"all_entry_count",
"article_count",
"media_count",
"has_fulltext_index",
"suggestion_string",
"suggestion_count",
"suggestion_result",
"search_string",
"search_count",
"search_result",
]
)
)
def test_reader_suggest_search(
all_zims,
filename,
entry_count,
all_entry_count,
article_count,
media_count,
has_fulltext_index,
suggestion_string,
suggestion_count,
suggestion_result,
search_string,
search_count,
search_result,
):
zim = Archive(all_zims / filename)
# suggestion and search results
assert zim.entry_count == entry_count
assert zim.all_entry_count == all_entry_count
assert zim.article_count == article_count
assert zim.media_count == media_count
if has_fulltext_index and search_string is not None:
query = Query().set_query(search_string)
searcher = Searcher(zim)
search = searcher.search(query)
assert search.getEstimatedMatches() == search_count
assert list(search.getResults(0, search_count)) == search_result
if suggestion_string is not None:
suggestion_searcher = SuggestionSearcher(zim)
suggestion = suggestion_searcher.suggest(suggestion_string)
assert suggestion.getEstimatedMatches() == suggestion_count
assert list(suggestion.getResults(0, suggestion_count)) == suggestion_result
@skip_if_offline
@pytest.mark.parametrize(
*parametrize_for(
[
"filename",
"test_path",
"test_title",
"test_mimetype",
"test_size",
"test_content_includes",
]
)
)
def test_reader_get_entries(
all_zims,
filename,
test_path,
test_title,
test_mimetype,
test_size,
test_content_includes,
):
zim = Archive(all_zims / filename)
# entries
with pytest.raises(KeyError):
zim.get_entry_by_path("___missing")
if test_path:
assert zim.has_entry_by_path(test_path)
entry = zim.get_entry_by_path(test_path)
assert entry.title == test_title
assert entry.path == test_path
assert test_path in str(entry)
assert test_title in str(entry)
item = entry.get_item()
assert item.title == test_title
assert item.path == test_path
assert test_path in str(item)
assert test_title in str(item)
assert item.mimetype == test_mimetype
assert item.size == test_size
assert isinstance(item.content, memoryview)
assert test_content_includes in bytes(item.content).decode("UTF-8")
with pytest.raises(KeyError):
zim.get_entry_by_title("___missing")
# example.zim cannot be queried by title as all its entries have been created
# with empty titles but the ZIM contains a v1 title listing.
if test_title and filename != "example.zim":
assert zim.has_entry_by_title(test_title)
assert zim.get_entry_by_title(test_title).path == entry.path
@skip_if_offline
@pytest.mark.parametrize(
*parametrize_for(["filename", "test_redirect", "test_redirect_to"])
)
def test_reader_redirect(all_zims, filename, test_redirect, test_redirect_to):
zim = Archive(all_zims / filename)
if test_redirect:
assert zim.get_entry_by_path(test_redirect).is_redirect
if test_redirect_to:
target_entry = zim.get_entry_by_path(test_redirect)
assert target_entry.get_redirect_entry().path == test_redirect_to
# make sure get_item resolves it
assert target_entry.get_item().path == test_redirect_to
# should be last redirect
assert target_entry.get_redirect_entry().is_redirect is False
with pytest.raises(RuntimeError):
target_entry.get_redirect_entry().get_redirect_entry()
@skip_if_offline
@pytest.mark.parametrize(*parametrize_for(["filename"]))
def test_reader_by_id(all_zims, filename):
zim = Archive(all_zims / filename)
# test index access
for index in range(0, zim.entry_count - 1):
assert zim._get_entry_by_id(index)._index == index
assert zim._get_entry_by_id(index).get_item()._index >= 0
@skip_if_offline
def test_archive_equality(all_zims):
class Different:
def __init__(self, filename):
self.filename = filename
class Sub(Archive):
pass
class Sub2(Archive):
@property
def filename(self):
return 1
fpath1 = all_zims / "zimfile.zim"
fpath2 = all_zims / "example.zim"
zim = Archive(fpath1)
assert zim != Archive(fpath2)
assert zim == Archive(fpath1)
assert zim != Different(fpath1)
assert zim == Sub(fpath1)
assert zim != Sub2(fpath1)
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1728999045.0
libzim-3.6.0/tests/test_libzim_version.py 0000644 0001751 0000177 00000001664 14703467205 020240 0 ustar 00runner docker import re
import sys
from libzim.version import ( # pyright: ignore [reportMissingModuleSource]
get_libzim_version,
get_versions,
print_versions,
)
def test_version_print_version_with_stdout(capsys):
print_versions()
print("", file=sys.stdout, flush=True)
stdout, stderr = capsys.readouterr()
assert len(stdout) != 0
def test_version_print_version_with_stderr(capsys):
print_versions(sys.stderr)
print("", file=sys.stderr, flush=True)
stdout, stderr = capsys.readouterr()
assert len(stderr) != 0
def test_get_versions():
versions = get_versions()
assert versions
assert "libzim" in versions
assert len(versions.keys()) > 1
for library, version in versions.items():
assert isinstance(library, str)
assert isinstance(version, str)
def test_get_libzim_version():
# libzim uses semantic versioning
assert re.match(r"\d+\.\d+\.\d+", get_libzim_version())