pax_global_header 0000666 0000000 0000000 00000000064 14705540454 0014522 g ustar 00root root 0000000 0000000 52 comment=1f73ee76e766bf8cd6cf94a54e701e06e9d8d8e5
phosh-osk-data-0.42.0/ 0000775 0000000 0000000 00000000000 14705540454 0014427 5 ustar 00root root 0000000 0000000 phosh-osk-data-0.42.0/.gitignore 0000664 0000000 0000000 00000000250 14705540454 0016414 0 ustar 00root root 0000000 0000000 input/
out/
data/
output/
debian/.debhelper/
debian/debhelper-build-stamp
debian/files
debian/phosh-osk-data-packager/
debian/phosh-osk-data-packager.substvars
doc/*.1
phosh-osk-data-0.42.0/.gitlab-ci.yml 0000664 0000000 0000000 00000002742 14705540454 0017070 0 ustar 00root root 0000000 0000000 include:
- remote: 'https://gitlab.freedesktop.org/freedesktop/ci-templates/-/raw/34039cd573a2df832d465bc9e4c5f543571f5241/templates/ci-fairy.yml'
stages:
- build
- deploy
default:
# Protect CI infra from rogue jobs
timeout: 15 minutes
# Allow jobs to be caneled on new commits
interruptible: true
# Retry on infra hickups automatically
retry:
max: 1
when:
- 'api_failure'
- 'runner_system_failure'
- 'scheduler_failure'
- 'stuck_or_timeout_failure'
variables:
# For ci-fairy
FDO_UPSTREAM_REPO: guidog/phosh-osk-data
DEBIAN_IMAGE: $CI_REGISTRY/guidog/phosh-osk-stub/debian:v0.0.2024-06-19
.prep: &prep
before_script:
- apt-get -y update
- apt-get -y build-dep .
.step: &build_step
script:
- mkdir -p out/
- make check
- dpkg-buildpackage -uc -us -rfakeroot -A
- 'cp ../phosh-osk-data-*_$(dpkg-parsechangelog -SVersion)_all.deb out/'
# Sanity checks of MR settings and commit logs
sanity:
extends:
- .fdo.ci-fairy
stage: build
script: |
if [ -n "$CI_OPEN_MERGE_REQUESTS" ]; then
ci-fairy check-commits --junit-xml=commit-message-junit-report.xml cifairy/main..HEAD ;
else
echo "Not a merge request" ;
fi
artifacts:
reports:
junit: commit-message-junit-report.xml
only:
variables:
- $CI_OPEN_MERGE_REQUESTS && $PKG_ONLY != "1"
build:native-debian-trixie:
stage: build
image: ${DEBIAN_IMAGE}
<<: *prep
<<: *build_step
artifacts:
paths:
- out/
phosh-osk-data-0.42.0/Makefile 0000664 0000000 0000000 00000000256 14705540454 0016072 0 ustar 00root root 0000000 0000000 PY_SCRIPTS = \
pod-db-from-wiki-dump \
phosh-osk-data-packager \
$(NULL)
check:
flake8 --format=pylint $(PY_SCRIPTS)
man:
$(MAKE) -C doc
clean:
$(MAKE) -C doc clean
phosh-osk-data-0.42.0/NEWS 0000664 0000000 0000000 00000000450 14705540454 0015125 0 ustar 00root root 0000000 0000000 phosh-osk-data 0.42.0
---------------------
Released: October 2024
* Move to databases built from Wikipedia dumps
* Allow to built packages from the db files for local installation
phosh-osk-data 0.25.0
---------------------
Released: February 2023
* Initial release with data for de, it and sv
phosh-osk-data-0.42.0/README.md 0000664 0000000 0000000 00000003417 14705540454 0015713 0 ustar 00root root 0000000 0000000 # phosh osk data
Scripts to build word prediction data for [phosh-osk-stub][] and other presage
based completers. The aim here is to have models that are distributable without
licensing issues and using modern language so we're using Wikipedia dumps.
## Building your own dictionaries based in Wikipedia data
Get a host with disk space (~40G), more cores make the first steps
(extraction and parsing into sentences significantly faster.
You can then provision it with the provided ansible playbook on your
cloud provider of choice:
```sh
ansible-playbook -v -i "${BUILDER}", -u root builder/setup.yml
```
`${BUILDER}` is the IP or hostname of the host to provision.
Once there get the Wikipedia dump:
```sh
ssh ${BUILDER}
cd output/
export LANG=es
wget "https://dumps.wikimedia.org/${LANG}wiki/latest/${LANG}wiki-latest-pages-articles.xml.bz2"
```
Import some nltk data:
```
python3 -c "import nltk; nltk.download('punkt')"
```
Process the dump
```
./pod-db-from-wiki-dump --processes 4 --language "${LANG}" --dump "output/${LANG}wiki-latest-pages-articles.xml.bz2" --output "output/${LANG}"
```
You'll then get a database usable by presage based completers in `output/${LANG}/database_${LANG}.db`.
This happens in steps so should a step fail you can skip it in subsequent runs.
See the `--skip-*` options. The extract and parsing steps happen in parallel
and can be spread over multiple cores (default `8`).
## Installing the data
See the [phosh-data-packager manpage](doc/phosh-osk-data-packager.rst).
## Related projects
- presage:
- sfos presage databases:
- phosh-osk-stub:
[phosh-osk-stub]: https://gitlab.gnome.org/guidog/phosh-osk-stub
phosh-osk-data-0.42.0/builder/ 0000775 0000000 0000000 00000000000 14705540454 0016055 5 ustar 00root root 0000000 0000000 phosh-osk-data-0.42.0/builder/setup.yml 0000664 0000000 0000000 00000003466 14705540454 0017751 0 ustar 00root root 0000000 0000000 - name: Setup Phosh OSK Data wiki builder
gather_facts: false
hosts: all
vars:
pod_user: pod-builder
pod_home: "/home/pod-builder"
wikiextractorpkg: "wikiextractor_3.0.7-1_all.deb"
tasks:
- name: Add packages
ansible.builtin.apt:
pkg:
- htop
- kitty-terminfo
- python3-nltk
- python3-tqdm
- screen
- sqlite3
- vim-nox
- wget
- name: Add user
ansible.builtin.user:
name: "{{ pod_user }}"
system: true
create_home: true
shell: /usr/sbin/nologin
home: "{{ pod_home }}"
- name: Download wikiextractor until in Debian
ansible.builtin.get_url:
url: "https://people.debian.org/~agx/wikiextractor/{{ wikiextractorpkg }}"
dest: "{{ pod_home }}/{{ wikiextractorpkg }}"
- name: Install wikiextractor
ansible.builtin.command: dpkg -i "{{ pod_home }}/{{ wikiextractorpkg }}"
- name: Copy script
ansible.builtin.copy:
src: ../pod-db-from-wiki-dump
dest: "{{ pod_home }}/pod-db-from-wiki-dump"
owner: pod-builder
mode: '0755'
- name: Creates directory
ansible.builtin.file:
path: "{{ pod_home }}/output"
state: directory
owner: "{{ pod_user }}"
- name: Check for wiki data volume
ansible.builtin.stat:
path: "/dev/disk/by-label/wiki-data"
register: d
- name: Mount data volume
ansible.posix.mount:
path: "{{ pod_home }}/output"
src: "/dev/disk/by-label/wiki-data"
state: mounted
fstype: ext4
when: d.stat.islnk is defined and d.stat.islnk
- name: Allow access to output dir
ansible.builtin.file:
path: "{{ pod_home }}/output"
state: directory
owner: "{{ pod_user }}"
phosh-osk-data-0.42.0/data/ 0000775 0000000 0000000 00000000000 14705540454 0015340 5 ustar 00root root 0000000 0000000 phosh-osk-data-0.42.0/data/.gitkeep 0000664 0000000 0000000 00000000000 14705540454 0016757 0 ustar 00root root 0000000 0000000 phosh-osk-data-0.42.0/debian/ 0000775 0000000 0000000 00000000000 14705540454 0015651 5 ustar 00root root 0000000 0000000 phosh-osk-data-0.42.0/debian/changelog 0000664 0000000 0000000 00000001422 14705540454 0017522 0 ustar 00root root 0000000 0000000 phosh-osk-data (0.42.0) experimental; urgency=medium
* Release 0.42.0
-- Guido Günther Mon, 21 Oct 2024 23:00:26 +0200
phosh-osk-data (0.42.0~rc1) experimental; urgency=medium
* build: Clean tmp/ too
* ci: Use a more recent image
* treewide: Allow to use Wikipedia dumps to build the corpus.
This allows us to use DFSG free and modern CC BY-SA 4.0 data.
We also provide a small anisble role to set up a host for that and a
script that downloads and packages the corpus into a distribution
package (currently deb only).
-- Guido Günther Wed, 09 Oct 2024 15:53:28 +0200
phosh-osk-data (0.25.0) experimental; urgency=medium
* Initial release.
-- Guido Günther Tue, 28 Feb 2023 15:57:15 +0100
phosh-osk-data-0.42.0/debian/control 0000664 0000000 0000000 00000001152 14705540454 0017253 0 ustar 00root root 0000000 0000000 Source: phosh-osk-data
Section: contrib/text
Priority: optional
Maintainer: Guido Günther
Rules-Requires-Root: no
Build-Depends:
debhelper-compat (= 13),
flake8,
python3-docutils,
python3-nltk,
wget,
Standards-Version: 4.7.0
Package: phosh-osk-data-packager
Architecture: all
Depends:
python3-requests,
python3-tqdm,
${shlibs:Depends},
${misc:Depends},
Recommends:
pkexec,
Suggests:
phosh-osk-stub,
Description: OSK completion data packager for Phosh
Data to enable text completion in phosh-osk-stub via presage.
.
This package contains as script to download and package data
files.
phosh-osk-data-0.42.0/debian/copyright 0000664 0000000 0000000 00000002041 14705540454 0017601 0 ustar 00root root 0000000 0000000 Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Source: https://gitlab.gnome.org/guidog/phosh-osk-data
Upstream-Name: phosh-osk-data
Upstream-Contact: Guido Günther
Files:
*
Copyright:
2024 Guido Günther
License: GPL-2+
This package is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
.
This package is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
.
You should have received a copy of the GNU General Public License
along with this program. If not, see
Comment:
On Debian systems, the complete text of the GNU General
Public License version 2 can be found in "/usr/share/common-licenses/GPL-2".
phosh-osk-data-0.42.0/debian/gbp.conf 0000664 0000000 0000000 00000000170 14705540454 0017266 0 ustar 00root root 0000000 0000000 [DEFAULT]
debian-branch = main
debian-tag = v%(version)s
debian-tag-msg = %(pkg)s v%(version)s
[tag]
sign-tags = true
phosh-osk-data-0.42.0/debian/phosh-osk-data-packager.install 0000664 0000000 0000000 00000000044 14705540454 0023634 0 ustar 00root root 0000000 0000000 ./phosh-osk-data-packager /usr/bin/
phosh-osk-data-0.42.0/debian/phosh-osk-data-packager.manpages 0000664 0000000 0000000 00000000010 14705540454 0023752 0 ustar 00root root 0000000 0000000 doc/*.1
phosh-osk-data-0.42.0/debian/rules 0000775 0000000 0000000 00000000101 14705540454 0016721 0 ustar 00root root 0000000 0000000 #!/usr/bin/make -f
%:
dh $@
override_dh_auto_build:
make man
phosh-osk-data-0.42.0/debian/source/ 0000775 0000000 0000000 00000000000 14705540454 0017151 5 ustar 00root root 0000000 0000000 phosh-osk-data-0.42.0/debian/source/format 0000664 0000000 0000000 00000000015 14705540454 0020360 0 ustar 00root root 0000000 0000000 3.0 (native)
phosh-osk-data-0.42.0/doc/ 0000775 0000000 0000000 00000000000 14705540454 0015174 5 ustar 00root root 0000000 0000000 phosh-osk-data-0.42.0/doc/Makefile 0000664 0000000 0000000 00000000237 14705540454 0016636 0 ustar 00root root 0000000 0000000 MANPAGES = \
phosh-osk-data-packager.1 \
$(NULL)
%.1: %.rst
rst2man --syntax-highlight=none $< > $@.tmp
mv $@.tmp $@
doc: $(MANPAGES)
clean:
rm -f *.1
phosh-osk-data-0.42.0/doc/phosh-osk-data-packager.rst 0000664 0000000 0000000 00000002266 14705540454 0022331 0 ustar 00root root 0000000 0000000 .. _phosh-osk-data-packager(1):
=======================
phosh-osk-data-packager
=======================
-----------------------------
Download and package OSK data
-----------------------------
SYNOPSIS
--------
| **phosh-osk-data-packager** [OPTIONS...]
DESCRIPTION
-----------
``phosh-osk-data-packager`` downloads and packages data for Phosh's on screen
keyboards (OSKs). It currently supports downloading and packaging data
for completion using the presage library.
OPTIONS
-------
``-h``, ``--help``
Print help and exit
``--language=LANGUAGE``
The language code of the language to process. E.g. `de` or `se`.
``--engine=ENGINE``
The completion engine to download data for. The only currently supported
engine is `presage` using it's sqlite backend.
``--pkg=FORMAT``
The packaging format to build a package for. The only currently supported
format is `deb` building a package for Debian based distributions.
``--install``
Whether to install the package
EXAMPLE
-------
This downloads and packages data for Swedish and puts the resulting package
into the current directory:
::
./phosh-osk-data-packager --language=se
See also
--------
``phosh-osk-stub(1)``
phosh-osk-data-0.42.0/phosh-osk-data-packager 0000775 0000000 0000000 00000007132 14705540454 0020755 0 ustar 00root root 0000000 0000000 #!/usr/bin/python3
#
# Copyright (C) The Phosh Developers
#
# SPDX-License-Identifier: GPL-3.0-or-later
#
# Author: Guido Günther
#
# Build a package for the given OSK data
import sys
import argparse
import subprocess
import requests
import shutil
from tempfile import TemporaryDirectory
from tqdm import tqdm
from pathlib import Path
VERSION = '0.42.0'
URL = 'https://data.phosh.mobi/osk-data'
def build_deb(dbfile, dir, engine, lang):
pkgname = f'phosh-osk-data-{lang}'
pkgdir = Path(dir) / pkgname
dbdir = pkgdir / 'usr' / 'share' / 'phosh' / 'osk' / engine
debian = pkgdir / 'DEBIAN'
control = debian / 'control'
dbdir.mkdir(parents=True)
debian.mkdir(parents=True)
shutil.move(dbfile, dbdir)
with open(control, 'w') as f:
f.write(f"""Package: {pkgname}
Source: phosh-osk-data
Version: {VERSION}-1
Architecture: all
Maintainer: Guido Günther
Suggests: phosh-osk-stub
Breaks: phosh-osk-data-eu (<< 0.42)
Replaces: phosh-osk-data-eu (<< 0.42)
Section: text
Priority: optional
Description: OSK completion data for phosh - {lang}
Data to enable text completion in phosh-osk-stub via {engine}.
.
This package contains the data files for {lang}.
.
It was generated via pod-build-pkg.
""")
subprocess.check_call(["dpkg-deb", '-b', pkgdir, dir])
deb = Path(dir) / f'{pkgname}_{VERSION}-1_all.deb'
if not deb.exists():
raise Exception(f"Deb {deb} not created")
return deb
def download_db(dir, engine, lang):
dbname = f'database_{lang}.db'
dbfile = Path(dir) / dbname
url = URL + f'/{VERSION}/{engine}/{dbname}'
response = requests.get(url, stream=True)
if response.status_code == 404:
print(f"No datase found for '{lang}'", file=sys.stderr)
return None
response.raise_for_status()
total = int(response.headers.get("content-length", 0))
with tqdm(total=total, unit="B", unit_scale=True) as progress_bar:
with open(dbfile, "wb") as f:
for data in response.iter_content(4096):
progress_bar.update(len(data))
f.write(data)
return dbfile
def install_deb(pkg):
try:
subprocess.check_call(["pkexec", "dpkg", "-i", str(pkg)])
except Exception as e:
print(f"Failed to install package: {e}", file=sys.stderr)
def main():
parser = argparse.ArgumentParser(
description="Download and Build a package for the given OSK data"
)
parser.add_argument(
"--language", type=str, default="en", help="Language to use"
)
parser.add_argument(
"--engine", type=str, default='presage', choices=['presage'], help="Completion engine"
)
parser.add_argument(
"--pkg", type=str, default='deb', choices=['deb'], help="Package format"
)
parser.add_argument(
"--install", action="store_true", help="Install the built package"
)
args = parser.parse_args()
with TemporaryDirectory(prefix="pod-build-pkg") as dir:
dbfile = download_db(dir, args.engine, args.language)
if not dbfile:
return 1
if args.pkg == 'deb':
pkg = build_deb(dbfile, dir, args.engine, args.language)
else:
print(f"Unsupported packaging format '{args.pkg}'", file=sys.stderr)
return 1
if args.install:
if args.pkg == 'deb':
pkg = install_deb(pkg)
else:
print(f"Don't know how to install '{args.pkg}'", file=sys.stderr)
return 1
else:
shutil.move(pkg, '.')
return 0
if __name__ == "__main__":
sys.exit(main())
phosh-osk-data-0.42.0/pod-db-from-wiki-dump 0000775 0000000 0000000 00000017046 14705540454 0020377 0 ustar 00root root 0000000 0000000 #!/usr/bin/python3
#
# Copyright (C) The Phosh Developers
#
# SPDX-License-Identifier: GPL-3.0-or-later
#
# Author: Guido Günther
#
# The extraction is base on the MIT licensed
# https://github.com/mpoyraz/ngram-lm-wiki
import fileinput
import os
import sys
import re
import json
import argparse
import random
import subprocess
import sqlite3
from tqdm import tqdm
from multiprocessing import Pool
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.util import ngrams
from pathlib import Path
from collections import defaultdict
# Tokenize to sentences
tokenize_fn = None
# Lowercase a sentence
lower_fn = None
# Drop an unwanted word when counting n-grams
drop_word_fn = None
# Chars to remove from wiki data
chars_to_remove_regex = r"[#$%&()*+,-./:;<=>?@\[\]^_{|}~!\"\\]"
apostrophes = "[’`´ʹʻʼʽʿˈ‘]"
# Maximum n-gram count
max_ngrams = 3
def parse_sentences_from_wiki_json_file(fpath):
with open(fpath) as fp:
texts = [json.loads(line.strip())["text"] for line in fp]
# Sentences from paragraphs
sentences = []
for text in texts:
for sent in tokenize_fn(text):
# Lower the sentence
sent = lower_fn(sent)
# Remove pre-defined chars
sent = re.sub(chars_to_remove_regex, "", sent)
# Unify apostrophes
sent = re.sub(apostrophes, "'", sent)
# Remove multiple spaces
sent = re.sub(r"\s+", " ", sent)
# Append
if len(sent) > 0:
sentences.append(sent)
return sentences
def extract_wiki_dump(extract_dir, wiki_dump, n_procs):
extractor = [
"wikiextractor",
wiki_dump,
"-o",
extract_dir,
"--no-templates",
"--json",
"--processes",
str(n_procs),
]
subprocess.check_call(extractor)
def parse_sentences(sentence_file, extract_dir, n_files, n_procs):
# Paths of the extracted wiki files
dirs = list(extract_dir.glob("[A-Z][A-Z]"))
filepaths = []
for i in range(n_files):
dir = random.choice(dirs)
subdir = list(dir.glob("wiki_??"))
f = random.choice(subdir)
print(f)
filepaths.append(f)
with open(sentence_file, "w") as f:
# Load each wiki files and parse sentences
with Pool(n_procs) as pool:
n_sentences = 0
for sentences in tqdm(
pool.imap(parse_sentences_from_wiki_json_file, filepaths), total=n_files):
for sent in sentences:
f.write(f"{sent}\n")
n_sentences += len(sentences)
print("Number of extracted sentences: {}".format(n_sentences))
def build_where_clause(words):
where_clause = "WHERE"
for i in range(len(words) - 1):
where_clause += f" word_{len(words) - i - 1} = '{words[i]}' AND"
where_clause += f" word = '{words[-1]}'"
return where_clause
def build_ngrams(sentences, ngram_file, db_file):
if os.path.exists(db_file):
os.remove(db_file)
print("Creating database:")
con = sqlite3.connect(db_file)
# con.set_trace_callback(print)
cur = con.cursor()
for n in range(0, max_ngrams):
cols = ", ".join([f"word_{i} TEXT" for i in reversed(range(n + 1))]).replace(
"_0", ""
)
constraints = "UNIQUE({})".format(
", ".join([f"word_{i}" for i in range(n + 1)])).replace("_0", "")
table = f"_{n + 1}_gram"
cur.execute(f"CREATE TABLE {table}({cols}, count INTEGER, {constraints})")
print("Filling database tables:")
i = 0
for sentence in tqdm(sentences):
counts = defaultdict(int)
tokens = word_tokenize(sentence)
for n in range(1, max_ngrams + 1):
n_grams = ngrams(tokens, n)
for n_gram in n_grams:
for word in n_gram:
if drop_word_fn(word):
break
else:
counts[n_gram] += 1
# Insert after each sentence to keep memory usage under control
for key, count in counts.items():
table = f"_{len(key)}_gram"
words = ",".join([f"'{word}'" for word in key])
where = build_where_clause(key)
query = f"SELECT count FROM {table} {where}"
try:
res = cur.execute(query).fetchone()
except Exception:
print("Statement failed: %s", query)
raise
if res:
count = res[-1] + 1
stmt = f"UPDATE {table} SET count = {count} {where}"
else:
stmt = f"INSERT INTO {table} VALUES ({words}, {count})"
try:
cur.execute(stmt)
except Exception:
print("Statement failed: %s", stmt)
raise
if (i % 100000 == 0):
con.commit()
i += 1
con.commit()
# Create index
for n in range(0, max_ngrams):
word_cols = "({})".format(", ".join([f"word_{i}" for i in reversed(range(n + 1))])).replace("_0", "")
table = f"_{n + 1}_gram"
index = f"_{n + 1}_index"
stmt = f"CREATE UNIQUE INDEX {index} ON {table}{word_cols}"
cur.execute(stmt)
con.commit()
cur.execute("pragma optimize")
con.commit()
# TODO: drop rare items from tables
con.execute("VACUUM")
con.commit()
con.close()
def main():
parser = argparse.ArgumentParser(
description="Build ngram database from Wikipedia dumps"
)
parser.add_argument(
"--dump", type=str, required=True, help="Path to a wikipedia dump"
)
parser.add_argument("--output", type=str, required=True, help="Output directory")
parser.add_argument(
"--language", type=str, default="de", help="Language of the wikipedia dump"
)
parser.add_argument(
"--processes", type=int, default=8, help="Number of processes to use"
)
parser.add_argument(
"--skip-extract", default=False, action="store_true", help="Extract wiki data"
)
parser.add_argument(
"--files", type=int, default=10, help="Number of wiki files to use to build the DB"
)
parser.add_argument(
"--skip-parse",
default=False,
action="store_true",
help="Parse extraced wiki data into sentences",
)
parser.add_argument(
"--skip-presage-ngrams",
default=False,
action="store_true",
help="Build n-grams of built sentences for presage",
)
args = parser.parse_args()
global tokenize_fn, lower_fn, drop_word_fn
# Defaults for all languages
tokenize_fn = sent_tokenize
lower_fn = lambda x: x.lower()
drop_word_fn = lambda x: "'" in x
if args.language in ["de"]:
tokenize_fn = lambda x: sent_tokenize(x, language="german")
output_path = Path(args.output)
extract_dir = output_path / "extract"
sentence_file = output_path / "sentences.txt"
ngram_file = output_path / f"n-gram-{args.language}.txt"
db_file = output_path / f"database_{args.language}.db"
if not args.skip_extract:
print("Extracting Wiki source")
extract_wiki_dump(extract_dir, args.dump, args.processes)
if not args.skip_parse:
print("Parsing sentences")
sentences = parse_sentences(sentence_file, extract_dir, args.files, args.processes)
sentences = fileinput.input(sentence_file, encoding="utf-8")
if not args.skip_presage_ngrams:
print("Building N-grams")
build_ngrams(sentences, ngram_file, db_file)
return 0
if __name__ == "__main__":
sys.exit(main())
phosh-osk-data-0.42.0/setup.cfg 0000664 0000000 0000000 00000000117 14705540454 0016247 0 ustar 00root root 0000000 0000000 [flake8]
# E501: ignore line length
# E731: do not use lambda
ignore=E501,E731