pax_global_header00006660000000000000000000000064144557545730014535gustar00rootroot0000000000000052 comment=9c2d2ffecb3b04dc11decb3295ef46dd3ad023fd pantomime-0.6.1/000077500000000000000000000000001445575457300135325ustar00rootroot00000000000000pantomime-0.6.1/.bumpversion.cfg000066400000000000000000000001341445575457300166400ustar00rootroot00000000000000[bumpversion] current_version = 0.6.1 commit = True tag = True [bumpversion:file:setup.py] pantomime-0.6.1/.github/000077500000000000000000000000001445575457300150725ustar00rootroot00000000000000pantomime-0.6.1/.github/dependabot.yml000066400000000000000000000002101445575457300177130ustar00rootroot00000000000000version: 2 updates: - package-ecosystem: pip directory: "/" schedule: interval: weekly open-pull-requests-limit: 99 pantomime-0.6.1/.github/workflows/000077500000000000000000000000001445575457300171275ustar00rootroot00000000000000pantomime-0.6.1/.github/workflows/build.yml000066400000000000000000000016571445575457300207620ustar00rootroot00000000000000name: build on: [push] jobs: python: runs-on: ubuntu-latest steps: - uses: actions/checkout@v1 - name: Show ref run: | echo "$GITHUB_REF" - name: Set up Python uses: actions/setup-python@v1 with: python-version: "3.x" - name: Install dependencies env: DEBIAN_FRONTEND: noninteractive run: | pip install -e ".[dev]" - name: Run the tests run: | make test - name: Check type annotations (strict) run: | make typecheck - name: Build a distribution run: | python setup.py sdist bdist_wheel - name: Publish a Python distribution to PyPI if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') uses: pypa/gh-action-pypi-publish@master with: user: __token__ password: ${{ secrets.pypi_password }} pantomime-0.6.1/.gitignore000066400000000000000000000001011445575457300155120ustar00rootroot00000000000000.coverage *.pyc dist/ build/ *.egg-info .vscode/ coverage-report/pantomime-0.6.1/.travis.yml000066400000000000000000000006621445575457300156470ustar00rootroot00000000000000language: python python: - "3.6" before_install: - sudo apt-get -qq update - sudo apt-get install -y libicu-dev install: - pip install coverage nose pyicu twine - pip install -e . script: - nosetests --with-coverage --cover-package=pantomime --cover-erase after_success: # push tag as well - if [[ $TRAVIS_TAG = $TRAVIS_BRANCH ]]; then python setup.py sdist bdist_wheel ; twine upload dist/* ; fi pantomime-0.6.1/LICENSE000066400000000000000000000020711445575457300145370ustar00rootroot00000000000000Copyright (c) 2018: Journalism Development Network, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. pantomime-0.6.1/MANIFEST.in000066400000000000000000000000741445575457300152710ustar00rootroot00000000000000include README.md include LICENSE include pantomime/py.typedpantomime-0.6.1/Makefile000066400000000000000000000006501445575457300151730ustar00rootroot00000000000000 all: clean test dists release test: pytest --cov-report html --cov-report term --cov=pantomime tests/ typecheck: mypy --strict pantomime/ dists: clean python setup.py sdist bdist_wheel release: dists twine upload dist/* clean: rm -rf dist build .eggs find . -name '*.egg-info' -exec rm -fr {} + find . -name '*.egg' -exec rm -f {} + find . -name '*.pyc' -exec rm -f {} + find . -name '*.pyo' -exec rm -f {} + pantomime-0.6.1/README.md000066400000000000000000000024471445575457300150200ustar00rootroot00000000000000# pantomime [![build](https://github.com/alephdata/pantomime/actions/workflows/build.yml/badge.svg)](https://github.com/alephdata/pantomime/actions/workflows/build.yml) ``pantomime`` is a small library that handles the parsing and normalisation of internet MIME types in Python. This can be useful to normalise invalid, or misformatted MIME types emitted by remote web servers. ## Usage The simplest use is to normalise a MIME type: ```python from pantomime import normalize_mimetype assert normalize_mimetype('TEXT/PLAIN') == 'text/plain' assert normalize_mimetype('plain/text') == 'text/plain' assert normalize_mimetype(None) == 'application/octet-stream' assert normalize_mimetype('') == 'application/octet-stream' ``` Internally, `pantomime` uses a `MIMEType` object to handle parsing. It can be used to access more specific information, like human readable labels: ```python from pantomime import parse_mimetype parsed = parse_mimetype('text/plain') assert parsed.family == 'text' assert parsed.subtype == 'plain' assert parsed.label == 'Plain text' ``` ## Open issues * Internationalisation, i.e. make the human-readable labels available in multiple languages. * Expand replacements for specific MIME types. ## License Licensed under MIT terms, see the ``LICENSE`` file included in this repository.pantomime-0.6.1/contrib/000077500000000000000000000000001445575457300151725ustar00rootroot00000000000000pantomime-0.6.1/contrib/occrp.csv000066400000000000000000000246311445575457300170230ustar00rootroot00000000000000application/pdf;8835746 message/rfc822;3443845 text/html;2990954 text/plain;1496825 image/jpeg;1292700 image/png;938072 text/csv;581702 application/xml+opfmessage;552849 application/octet-stream;453592 text/xml;439849 inode/directory;405202 application/msword;380408 ;270829 image/gif;248397 application/x-java-applet;240467 application/vnd.openxmlformats-officedocument.wordprocessingml.document;206014 image/tiff;145498 inode/x-empty;104586 application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;104208 application/vnd.ms-excel;96965 image/vnd.djvu;86373 application/zip;72048 image/webp;60878 text/rtf;36756 application/vnd.ms-outlook;33290 application/x-dosexec;26946 application/rar;22160 application/x-zip-compressed;22021 text/calendar;21089 application/vnd.openxmlformats-officedocument.presentationml.presentation;15747 application/rtf;13702 text/x-php;10417 text/x-c;9602 application/vnd.ms-powerpoint;7120 application/pgp-encrypted;6645 application/gzip;6166 application/x-dbf;5875 image/pjpeg;5259 application/mbox;4906 application/x-msdownload;4750 application/vnd.android.package-archive;4479 image/jpg;4207 text/x-java;4185 text/x-diff;4031 application/cdfv2;3936 text/vcard;3887 audio/x-wav;3791 text/troff;3494 image/x-ms-bmp;3490 application/x-sharedlib;3334 application/x-pkcs7-mime;3193 application/x-7z-compressed;3086 application/pkcs7-mime;2970 application/json;2878 application/vnd.openxmlformats-officedocument.presentationml.slideshow;2877 application/vnd.ms-excel.sheet.macroenabled.12;2682 application/x-rar;2509 text/x-asm;2466 application/x-excel;2448 text/x-shellscript;2348 application/x-msdos-program;2244 text/x-c++;2130 text/x-python;2035 application/vnd.ms-office;1881 message/delivery-status;1697 audio/mpeg;1598 application/postscript;1563 application/x-shockwave-flash;1527 application/vnd.oasis.opendocument.text;1491 application/x-object;1403 image/x-icon;1309 image/svg+xml;1296 application/font-sfnt;1265 application/x-executable;1219 video/mp4;1211 image/x-png;1123 application/x-mach-binary;1053 application/ics;978 application/csv;950 application/x-pkcs12;931 application/x-sqlite3;931 application/x-rar-compressed;919 image/bmp;868 text/x-ruby;853 application/x-ms-wmz;841 text/x-makefile;841 application/x-tar;829 application/vnd.ms-excel.sheet.binary.macroenabled.12;797 video/quicktime;774 application/vnd.ms-office.vbaproject;767 application/vnd.ms-fontobject;752 text/x-pascal;736 image/x-tga;733 application/x-pkcs7-signature;711 application/vnd.oasis.opendocument.spreadsheet;690 application/vnd.ms-cab-compressed;685 text/x-ruby-script;652 text/x-yaml;634 image/vnd.dwg;620 text/x-vcard;598 image/x-icns;584 application-octet/stream;555 application/xml;518 application/x-font-ttf;455 unknown/unknown;452 application/java-archive;445 text/x-msdos-batch;401 application/x-msword;397 application/msexcel;378 application/x-dmp;373 image/pdf;361 text/x-perl;354 video/mpeg;349 application/pgp-keys;323 application/force-download;320 application/x-msaccess;313 application/vnd.openxmlformats-officedocument.wordprocessingml.template;294 text/x-objective-c;272 image/x-citrix-jpeg;267 application/vnd.ms-opentype;267 application/x-archive;260 application/x-arc;260 application/x-download;253 application/oleobject;248 image/x.djvu;240 application/vnd.tcpdump.pcap;228 text/x-log;228 image/svg;225 video/x-msvideo;224 application/zlib;218 image/x-pcx;216 text/directory;212 image/vnd.adobe.photoshop;212 image/x-photoshop;191 image/x-coreldraw;180 application/x-pdf;179 application/vnd.openxmlformats-officedocument.wordprocessingml.d;178 application/x-gzip-compressed;177 application/vnd.ms-word.document.macroenabled.12;175 application/epub+zip;171 application/encrypted;165 APPLICATION/PDF;164 application/x-x509-ca-cert;162 text/x-csrc;161 application/x-msexcel;160 video/x-ms-asf;154 application/x-wine-extension-ini;153 application/x-msi;152 application/x-compressed;148 video/x-ms-wmv;147 application/x-gzip;142 video/x-flv;134 application/x-setupscript;132 application/vnd.ms-project;132 text/javascript;131 application/x-extension-lic;129 text/css;129 application/x-ms-pdb;128 application/pgp-signature;125 application/vnd.oasis.opendocument.presentation;124 application/octetstream;121 application/pkcs7-signature;120 application/ms-tnef;118 audio/ogg;115 audio/x-m4a;114 application/x-java-keystore;113 audio/mp3;110 application/cdfv2-encrypted;109 application/vnd.ms-msi;108 image/x-eps;107 text/x-po;99 document/pdf;96 application/x-font-pfm;94 text/richtext;92 IMAGE/JPEG;90 image/jxr;89 text/x-sh;82 application/pgp;79 image/x-tiff;78 application/x-mspowerpoint;78 application/vnd.ms-word.document.12;77 application/vnd.ms-excel.sheet.macroEnabled.12;75 application/excel;75 application/x-ruby;74 application/x-bzip;73 application/vnd.apple.pkpass;73 application/vnd.visio;69 appliation/zip;69 application/x-pgp-keyring;67 application/x-tar-gz;66 application/x-compressed-tar;66 TEXT/HTML;64 text/pgp;60 text/x-vcalendar;60 audio/x-aiff;59 application/x-bittorrent;56 image/pcx;55 image/x-citrix-gif;52 application/x-ms-dos-executable;52 application/vnd.ms-xpsdocument;52 multipart/signed;50 image/jp2;49 application/applefile;49 audio/x-hx-aac-adts;48 application/download;48 image/x-xpmi;46 application/vnd.openxmlformats-officedocument.spreadsheetml.shee;45 audio/x-mpeg;42 application/winhelp;41 application/javascript;40 application/x-zip;40 image/psd;39 application/vnd.openxmlformats-officedocument.wordprocessingml.documen;39 image/tif;39 application/octet-streamn;39 video/x-matroska;37 image/x-xcf;37 application/x-sharing-metadata-xml;36 video/3gpp;35 application/binary;34 APPLICATION/OCTET-STREAM;33 text/x-changelog;32 application/vnd.adobe.air-application-installer-package;32 application/vnd.lotus-1-2-3;31 application/docx;31 image/g3fax;29 application/mixed;29 application/base64;28 audio/wav;28 application/x-file-download;27 text/x-chdr;27 x-unknown/octet-stream;27 application/x-tex-tfm;26 audio/amr;25 application/vnd.iccprofile;25 application/x-apple-msg-attachment;25 application/vnd.openxmlformats-officedocument.presentationml.pre;25 x-application/apple-pdf;24 message/news;24 application/x-javascript;24 application/x-dosdriver;24 binary/octet-stream;24 application/x-ms-excel;23 application/epub;23 application/x-webarchive;23 application/octet;22 IMAGE/TIFF;22 application/x-iwork-keynote-sffkey;21 application/x-unknown;21 image/*;20 application/vnd.ms-excel.12;19 file/pdf;19 image/vnd.microsoft.icon;19 application/vnd.ms-powerpoint.presentation.macroenabled.12;18 text/x-python-script;18 application/x-bzip2;18 application/x-shellscript;18 text/x-patch;18 video/x-fl;17 application/x-unknown-application-pdf;16 application/x-other-1;16 application/x-deb;16 application/x-apple-diskimage;16 x-epoc/x-sisx-app;16 application/x-unknown-application-octet-stream;16 application/vnd.ms-word;16 application/x-sql;15 audio/mp4;14 application/vnd.rim.cod;14 application/vnd.ms-publisher;14 application/x-force-download;13 application/vnd.rn-realmedia;13 file/rtf;13 application/vnd.fdf;13 application/x-ms-reader;13 image/eps;13 application/x-gnupg-keyring;12 image/vnd;12 chemical/x-cerius;12 video/x-m4v;12 APPLICATION/VND.MS-EXCEL;11 application/msaccess;11 video/avi;11 application/vnd.ms-officetheme;10 audio/x-zip;10 application/doc;10 video/msvideo;10 application/x-msmetafile;10 application/http-index-format;9 text/x-m4;9 application/onenote;9 application/x-ms-ese;9 application/vnd.openxmlformats-officedocument.wordprocessingml;9 application/vnd.openxmlformats-officedocume;9 image/x-win-bitmap;8 image/cur;8 text/x-moz-deleted;8 application/vnd.oasis.opendocument.graphics;8 application/smil;8 application/x-xpinstall;8 application/illustrator;7 text/x-c++src;7 application/msworks;7 application/vnd.ms-visio.drawing;7 text/x-tex;6 application/x-java-archive;6 application/x-rdp;6 application/x-arj;6 application/x-winhelp;6 IMAGE/PNG;6 text/pdf;6 application/macbinary;6 application/emz;6 audio/x-ms-wma;5 text/enriched;5 video/webm;5 application/x-dbt;5 application/x-dvi;5 application/x-compress;5 IMAGE/X-TIFF;5 IMAGE/PJPEG;5 application/x-php;4 application/x-gunzip;4 APPLICATION/MSWORD;4 application/xlsx;4 application/vnd.oasis.opendocument.formula;4 application/mspowerpoint;4 file/unknown;4 application/x-coredump;4 application/x-font-sfn;4 application/html;4 application/x-iwork-pages-sffpages;4 null/octet-stream;4 application/x-tgz;4 */*;4 video/ogg;4 audio/x-mp4a-latm;4 application/x-debian-package;4 application/vnd.swiftview-tif;4 application/x-perl;4 image/vnd.ms-modi;4 application/x-itunes-itlp;4 image/vnd.dgn;3 application/eml;3 chemical/x-ncbi-asn1-ascii;3 audio/mid;3 application/x-winhelp-fts;3 application/x-rpm;3 application/powerpoint;3 text/rfc822-headers;3 application/vnd.ms-powerpoint.presentation.12;3 audio/midi;3 text/multipart;3 application/x-emc;3 application/vnd.openxmlformats-officedocument.presentationml.template;3 text/x-fortran;3 invalid/pdf;3 application/x-pkcs7-certificates;2 application/vnd.ms-powerpoint.12;2 image/x-nikon-nef;2 application/vnd.ms-access;2 application/vndopenxmlformats-officedocumentspreadsheetmlsheet;2 image/jpf;2 application/vnd.pdf;2 content/unknown;2 chemical/x-galactic-spc;2 pdf/octet-stream;2 chemical/x-chemdraw;2 application/pkix-cert;2 x-unknown/stream;2 application/pages;2 application/vnd.symbian.install;2 text/x-awk;2 text/x-csh;2 application/x-any;2 application/msonenote;2 application/x-wine-extension-skp;2 application/x-appleworks3;2 application/mindmanager;2 application/jpg;2 application/jpeg;2 application/x-bzip-compressed-tar;2 application/x-rpt;2 application/haansoftdoc;2 text/x-vhdl;2 application/x-photoshop;2 application/x-ole-storage;2 application/x-mspublisher;2 application/vnd.ms-powerpoint.presentation.macroEnabled.12;2 audio/3gpp;1 application/x-java-jnlp-file;1 audio/ima4;1 audio/m4a;1 image/x-portable-pixmap;1 application/vnd.wordprocessing-openxml;1 type/multipart;1 text/text;1 x-unknown/pdf;1 APPLICATION/pdf;1 "text/html; charset=iso-8859-1";1 application/defanged-70628;1 application/x-omnigraffle;1 application/tiff;1 application/defanged-70626;1 video/vnd.dlna.mpeg-tts;1 application/unknown;1 application/vnd.ms-excel.sheet.12;1 application/cgi;1 text/calender;1 plain/text;1 application/vnd.ms-word.document.macroEnabled.12;1 model/vnd.mts;1 video/x-ms-wm;1 application/x-stuffit;1 application/vnd.iwork.pages.archive;1 application/x-iso9660-image;1 x-unknown/unknown;1 application/x-unknown-application-vnd.ms-excel;1 application/vnd.openxmlformats-officedocument.spreadsheetml.template;1 application/mime;1 text/x-lisp;1pantomime-0.6.1/contrib/play.py000066400000000000000000000007601445575457300165140ustar00rootroot00000000000000from pprint import pprint from collections import defaultdict import csv from pantomime import parse_mimetype data = defaultdict(int) with open('occrp.csv', 'r') as fh: reader = csv.reader(fh, delimiter=';') for row in reader: original, count = row parsed = parse_mimetype(original) print(parsed.label) # data[parsed.normalized] += int(count) # if parsed.normalized != original: # pprint((original, parsed.label)) print(len(data)) pantomime-0.6.1/pantomime/000077500000000000000000000000001445575457300155235ustar00rootroot00000000000000pantomime-0.6.1/pantomime/__init__.py000066400000000000000000000007571445575457300176450ustar00rootroot00000000000000from pantomime.parse import MIMEType from pantomime.types import DEFAULT, PLAIN from pantomime.mime import parse_mimetype, normalize_mimetype from pantomime.mime import useful_mimetype from pantomime.filename import FileName from pantomime.filename import normalize_extension, mimetype_extension __all__ = [ "MIMEType", "FileName", "DEFAULT", "PLAIN", "parse_mimetype", "normalize_mimetype", "useful_mimetype", "normalize_extension", "mimetype_extension", ] pantomime-0.6.1/pantomime/filename.py000066400000000000000000000034621445575457300176620ustar00rootroot00000000000000import os from typing import Any, Optional from banal import decode_path from mimetypes import guess_extension from normality import slugify, safe_filename from pantomime.mime import normalize_mimetype from pantomime.types import DEFAULT def normalize_extension(extension: Optional[str]) -> Optional[str]: """Normalise a file name extension.""" extension = decode_path(extension) if extension is None: return None if extension.startswith("."): extension = extension[1:] if "." in extension: _, extension = os.path.splitext(extension) extension = slugify(extension, sep="") if extension is None or not len(extension): return None return extension def mimetype_extension(mime_type: Optional[str]) -> Optional[str]: """Infer a possible extension from a MIME type.""" mime_type = normalize_mimetype(mime_type) if mime_type == DEFAULT: return None extension = guess_extension(mime_type) return normalize_extension(extension) class FileName(object): FALLBACK = "data" def __init__(self, file_name: Optional[str]): self.file_name = file_name self.base: Optional[str] = None self.extension: Optional[str] = None if file_name is not None: self.base, ext = os.path.splitext(file_name) self.extension = normalize_extension(ext) self.has_extension = self.extension is not None def safe(self, extension: Optional[str] = None) -> Optional[str]: ext = extension or self.extension default = "data.%s" % ext if ext else self.FALLBACK return safe_filename(self.file_name, default=default, extension=ext) def __str__(self) -> str: return self.file_name or self.FALLBACK def __repr__(self) -> str: return " MIMEType: """Parse a MIME type into a structured object.""" return MIMEType.parse(text, default=default) def normalize_mimetype(text: Optional[str], default: str = DEFAULT) -> str: """Normalize the spelling of a MIME type.""" return parse_mimetype(text, default=default).normalized or default def useful_mimetype(text: Optional[str]) -> bool: """Check to see if the given mime type is a MIME type which is useful in terms of how to treat this file. """ mimetype = normalize_mimetype(text) return mimetype not in [DEFAULT, PLAIN, None] pantomime-0.6.1/pantomime/parse.py000066400000000000000000000053301445575457300172100ustar00rootroot00000000000000from email.message import EmailMessage from typing import Any, Dict, Optional, Tuple from normality import stringify from normality.encoding import tidy_encoding from pantomime.types import DEFAULT, LABELS from pantomime.mappings import REPLACE class MIMEType(object): __slots__ = ["family", "subtype", "params", "name", "normalized"] SEP = "/" def __init__( self, family: Optional[str], subtype: Optional[str], params: Optional[Dict[str, str]] = None, ): self.family = family self.subtype = subtype self.name: Optional[str] = None if self.family is not None and self.subtype is not None: self.name = self.SEP.join((self.family, self.subtype)) self.normalized: Optional[str] = self.name if self.name in REPLACE: self.normalized = REPLACE.get(self.name, self.name) self.params: Dict[str, str] = params or {} @property def label(self) -> Optional[str]: if self.normalized in LABELS: return LABELS.get(self.normalized, self.normalized) if self.subtype is not None: label = self.subtype.lstrip("x") label = label.replace("-", " ") label = label.replace(".", " ") return label.strip() return None @property def charset(self) -> Optional[str]: charset = self.params.get("charset") if charset is None: return None return tidy_encoding(charset) @classmethod def split(cls, mime_type: Optional[str]) -> Tuple[Optional[str], Optional[str]]: if mime_type is None or cls.SEP not in mime_type: return None, None family, subtype = (p.strip() for p in mime_type.split(cls.SEP, 1)) if len(family) == 0 or len(subtype) == 0: return None, None return family.lower(), subtype.lower() @classmethod def parse( cls, mime_type: Optional[str], default: Optional[str] = None ) -> "MIMEType": mime_type = stringify(mime_type) params = None if mime_type is not None: msg = EmailMessage() msg['content-type'] = mime_type mime_type = msg.get_content_type() if mime_type.count("/") == 1 else None params = msg['content-type'].params family, subtype = cls.split(mime_type) if family is None: family, subtype = cls.split(default) return cls(family, subtype, params=params) def __eq__(self, other: Any) -> bool: return str(self) == str(other) def __hash__(self) -> int: return hash(str(self)) def __str__(self) -> str: return self.name or DEFAULT def __repr__(self) -> str: return str(self) pantomime-0.6.1/pantomime/py.typed000066400000000000000000000000001445575457300172100ustar00rootroot00000000000000pantomime-0.6.1/pantomime/types.py000066400000000000000000000043251445575457300172450ustar00rootroot00000000000000from pantomime.util import gettext DEFAULT = "application/octet-stream" DIRECTORY = "inode/directory" EMPTY = "inode/x-empty" PLAIN = "text/plain" PDF = "application/pdf" EXCEL = "application/vnd.ms-excel" XLS = EXCEL XLSX = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" WORD = "application/vnd.ms-word" DOCX = "application/vnd.openxmlformats-officedocument.wordprocessing" CSV = "text/csv" RTF = "text/rtf" XML = "application/xml" PSD = "image/vnd.adobe.photoshop" RAR = "application/rar" ZIP = "application/zip" GZIP = "application/gzip" RFC822 = "message/rfc822" HTML = "text/html" JPEG = "image/jpeg" PNG = "image/png" GIF = "image/gif" TIFF = "image/tiff" DJVU = "image/x.djvu" OPF = "application/xml+opfmessage" OUTLOOK = "application/vnd.ms-outlook" ZIP = "application/zip" JSON = "application/json" FTM = "application/json+ftm" FTM_RSLV = "application/json+ftm-rslv" FTM_STMT = "application/json+ftm-statements" MIXED = "multipart/mixed" ALTERNATIVE = "multipart/alternative" RELATED = "multipart/related" LABELS = { DEFAULT: gettext("Unknown file type"), DIRECTORY: gettext("Directory"), EMPTY: gettext("Empty file"), PLAIN: gettext("Plain text"), PDF: gettext("Portable Document Format"), EXCEL: gettext("Microsoft Excel"), XLSX: gettext("Microsoft Excel 2002+"), WORD: gettext("Microsoft Word"), DOCX: gettext("Microsoft Word 2002+"), CSV: gettext("Comma-separated table"), RTF: gettext("Rich text"), PSD: gettext("Adobe Photoshop"), RAR: gettext("WinRAR archive"), ZIP: gettext("Zip archive"), GZIP: gettext("GZip archive"), RFC822: gettext("Plain E-Mail"), HTML: gettext("HTML Web Page"), JPEG: gettext("JPEG Image"), TIFF: gettext("Tagged Image File Format"), DJVU: gettext("DejaVu E-Book"), PNG: gettext("Portable Network Graphics"), GIF: gettext("Graphics Interchange Format"), OPF: gettext("Microsoft Outlook for Mac E-Mail"), OUTLOOK: gettext("Microsoft Outlook E-Mail"), JSON: gettext("JavaScript Object Notation"), XML: gettext("eXtensible Markup Language"), FTM: gettext("FollowTheMoney Entities"), FTM_RSLV: gettext("FollowTheMoney Integrated"), FTM_STMT: gettext("FollowTheMoney Statements"), } pantomime-0.6.1/pantomime/util.py000066400000000000000000000000571445575457300170540ustar00rootroot00000000000000def gettext(text: str) -> str: return text pantomime-0.6.1/setup.cfg000066400000000000000000000000311445575457300153450ustar00rootroot00000000000000[bdist_wheel] universal=1pantomime-0.6.1/setup.py000066400000000000000000000026611445575457300152510ustar00rootroot00000000000000from setuptools import setup, find_packages with open("README.md") as f: long_description = f.read() setup( name="pantomime", version="0.6.1", description="MIME type normalisation and labels.", long_description=long_description, long_description_content_type="text/markdown", classifiers=[ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.6", ], keywords="mime mimetypes file types", author="Journalism Development Network, Inc.", author_email="data@occrp.org", url="http://github.com/alephdata/pantomime", license="MIT", packages=find_packages(exclude=["ez_setup", "examples", "test"]), namespace_packages=[], package_data={"pantomime": ["py.typed"]}, include_package_data=True, zip_safe=False, install_requires=[ "banal >= 0.3.5", "normality >= 0.5.6", ], tests_require=["nose"], entry_points={}, extras_require={ "dev": [ "wheel>=0.29.0", "twine", "mypy", "black", "flake8>=2.6.0", "pytest", "pytest-cov", "banal", "coverage>=4.1", ] }, ) pantomime-0.6.1/tests/000077500000000000000000000000001445575457300146745ustar00rootroot00000000000000pantomime-0.6.1/tests/__init__.py000066400000000000000000000000001445575457300167730ustar00rootroot00000000000000pantomime-0.6.1/tests/test_extension.py000066400000000000000000000022521445575457300203220ustar00rootroot00000000000000import unittest from pantomime.filename import normalize_extension from pantomime.filename import mimetype_extension as mime_ext class ExtensionTest(unittest.TestCase): def test_normalize(self): self.assertEqual(normalize_extension(".doc"), "doc") self.assertEqual(normalize_extension(None), None) self.assertEqual(normalize_extension(""), None) self.assertEqual(normalize_extension("bla.doc"), "doc") self.assertEqual(normalize_extension("bla.DOC"), "doc") self.assertEqual(normalize_extension("bla.DO C"), "doc") self.assertEqual(normalize_extension("bla. DOC "), "doc") self.assertEqual(normalize_extension("TXT"), "txt") self.assertEqual(normalize_extension(".TXT"), "txt") self.assertEqual(normalize_extension("foo.txt"), "txt") self.assertEqual(normalize_extension("foo..TXT"), "txt") self.assertEqual(normalize_extension(".HTM,L"), "html") def test_mimetype_extension(self): self.assertEqual(mime_ext(None), None) self.assertEqual(mime_ext(""), None) self.assertEqual(mime_ext("bla"), None) self.assertEqual(mime_ext("application/pdf"), "pdf") pantomime-0.6.1/tests/test_filename.py000066400000000000000000000016471445575457300200750ustar00rootroot00000000000000import unittest from pantomime.filename import FileName class FileNameTest(unittest.TestCase): def test_none_filename(self): fn = FileName(None) self.assertEqual(fn.file_name, None) self.assertEqual(fn.extension, None) self.assertFalse(fn.has_extension) self.assertEqual(fn.safe(), "data") def test_normal_filename(self): fn = FileName("testing .doc") self.assertEqual(fn.file_name, "testing .doc") self.assertEqual(fn.extension, "doc") self.assertTrue(fn.has_extension) self.assertEqual(fn.safe(), "testing.doc") self.assertEqual(fn.safe("xls"), "testing.xls") def test_no_ext_filename(self): fn = FileName("testing xxx") self.assertEqual(fn.extension, None) self.assertFalse(fn.has_extension) self.assertEqual(fn.safe(), "testing_xxx") self.assertEqual(fn.safe("doc"), "testing_xxx.doc") pantomime-0.6.1/tests/test_mime.py000066400000000000000000000034211445575457300172340ustar00rootroot00000000000000import unittest from pantomime import parse_mimetype, normalize_mimetype, DEFAULT from pantomime import useful_mimetype class MIMETest(unittest.TestCase): def test_normalize(self): self.assertEqual(normalize_mimetype("TEXT/ PLAIN"), "text/plain") self.assertEqual(normalize_mimetype("TEXT/"), DEFAULT) self.assertEqual(normalize_mimetype("1"), DEFAULT) self.assertEqual(normalize_mimetype("1", default=None), None) self.assertEqual(normalize_mimetype(None), DEFAULT) PST = "application/VND.ms-outlook" self.assertEqual(normalize_mimetype(PST), PST.lower()) def test_useful(self): self.assertFalse(useful_mimetype(None)) self.assertFalse(useful_mimetype(DEFAULT)) self.assertTrue(useful_mimetype("image/png")) def test_label(self): parsed = parse_mimetype("application/x-pudo-banana") self.assertEqual(parsed.label, "pudo banana") def test_parse(self): parsed = parse_mimetype("text/plain") self.assertEqual(parsed.charset, None) self.assertEqual(parsed.label, "Plain text") self.assertEqual(parsed.family, "text") self.assertEqual(parsed.subtype, "plain") self.assertEqual(parsed.normalized, "text/plain") self.assertEqual("%s" % parsed, "text/plain") self.assertEqual("%r" % parsed, "text/plain") parsed = parse_mimetype("text/plain; charset=cp1251") self.assertEqual(parsed.charset, "cp1251") parsed = parse_mimetype("text/plain; charset=banana") self.assertEqual(parsed.charset, "utf-8") self.assertEqual(parsed, parse_mimetype("text/plain")) def test_parse_rewrite(self): parsed = parse_mimetype("plain/text") self.assertEqual(parsed.normalized, "text/plain")