hachoir-metadata-1.3.3/0000755000175000017500000000000011423160161013726 5ustar haypohaypohachoir-metadata-1.3.3/hachoir-metadata0000755000175000017500000001453511251277274017074 0ustar haypohaypo#!/usr/bin/python import sys try: from hachoir_core.error import error, HachoirError from hachoir_core.cmd_line import unicodeFilename from hachoir_core.i18n import getTerminalCharset, _ from hachoir_core.benchmark import Benchmark from hachoir_core.stream import InputStreamError from hachoir_core.tools import makePrintable from hachoir_parser import createParser, ParserList import hachoir_core.config as hachoir_config from hachoir_metadata import config except ImportError, err: raise print >>sys.stderr, "Unable to import an Hachoir module: %s" % err sys.exit(1) from optparse import OptionGroup, OptionParser from hachoir_metadata import extractMetadata from hachoir_metadata.metadata import extractors as metadata_extractors def displayParserList(*args): parser_list = ParserList() for parser in metadata_extractors.keys(): parser_list.add(parser) parser_list.print_(_("List of metadata extractors.")) sys.exit(0) def displayVersion(*args): import hachoir_core from hachoir_metadata import __version__ print _("Metadata extractor version %s") % __version__ print _("Hachoir library version %s") % hachoir_core.__version__ print print _("Website: %s/wiki/hachoir-metadata") % hachoir_core.WEBSITE sys.exit(0) def parseOptions(): parser = OptionParser(usage="%prog [options] files") parser.add_option("--type", help=_("Only display file type (description)"), action="store_true", default=False) parser.add_option("--mime", help=_("Only display MIME type"), action="store_true", default=False) parser.add_option("--level", help=_("Quantity of information to display from 1 to 9 (9 is the maximum)"), action="store", default="9", type="choice", choices=[ str(choice) for choice in xrange(1,9+1) ]) parser.add_option("--raw", help=_("Raw output"), action="store_true", default=False) parser.add_option("--bench", help=_("Run benchmark"), action="store_true", default=False) parser.add_option("--force-parser",help=_("List all parsers then exit"), type="str") parser.add_option("--parser-list",help=_("List all parsers then exit"), action="callback", callback=displayParserList) parser.add_option("--profiler", help=_("Run profiler"), action="store_true", default=False) parser.add_option("--version", help=_("Display version and exit"), action="callback", callback=displayVersion) parser.add_option("--quality", help=_("Information quality (0.0=fastest, 1.0=best, and default is 0.5)"), action="store", type="float", default="0.5") parser.add_option("--maxlen", help=_("Maximum string length in characters, 0 means unlimited (default: %s)" % config.MAX_STR_LENGTH), type="int", default=config.MAX_STR_LENGTH) parser.add_option("--verbose", help=_("Verbose mode"), default=False, action="store_true") parser.add_option("--debug", help=_("Debug mode"), default=False, action="store_true") values, filename = parser.parse_args() if len(filename) == 0: parser.print_help() sys.exit(1) # Update limits config.MAX_STR_LENGTH = values.maxlen if values.raw: config.RAW_OUTPUT = True return values, filename def processFile(values, filename, display_filename=False, priority=None, human=True, display=True): charset = getTerminalCharset() filename, real_filename = unicodeFilename(filename, charset), filename # Create parser try: if values.force_parser: tags = [ ("id", values.force_parser), None ] else: tags = None parser = createParser(filename, real_filename=real_filename, tags=tags) except InputStreamError, err: error(unicode(err)) return False if not parser: error(_("Unable to parse file: %s") % filename) return False # Extract metadata extract_metadata = not(values.mime or values.type) if extract_metadata: try: metadata = extractMetadata(parser, values.quality) except HachoirError, err: error(unicode(err)) metadata = None if not metadata: parser.error(_("Hachoir can't extract metadata, but is able to parse: %s") % filename) return False if display: # Display metadatas on stdout if extract_metadata: text = metadata.exportPlaintext(priority=priority, human=human) if not text: text = [_("(no metadata, priority may be too small)")] if display_filename: for line in text: line = "%s: %s" % (filename, line) print makePrintable(line, charset) else: for line in text: print makePrintable(line, charset) else: if values.type: text = parser.description else: text = parser.mime_type if display_filename: text = "%s: %s" % (filename, text) print text return True def processFiles(values, filenames, display=True): human = not(values.raw) ok = True priority = int(values.level)*100 + 99 display_filename = (1 < len(filenames)) for filename in filenames: ok &= processFile(values, filename, display_filename, priority, human, display) return ok def benchmarkMetadata(values, filenames): bench = Benchmark() bench.run(processFiles, values, filenames, display=False) def profile(values, filenames): from hachoir_core.profiler import runProfiler return runProfiler(processFiles, (values, filenames), {'display': False}) def main(): try: # Parser options and initialize Hachoir values, filenames = parseOptions() if values.debug: hachoir_config.debug = True elif values.verbose: hachoir_config.verbose = True else: hachoir_config.quiet = True if values.profiler: ok = profile(values, filenames) elif values.bench: ok = benchmarkMetadata(values, filenames) else: ok = processFiles(values, filenames) except KeyboardInterrupt: print _("Program interrupted (CTRL+C).") ok = False sys.exit(int(not ok)) if __name__ == "__main__": main() hachoir-metadata-1.3.3/MANIFEST.in0000644000175000017500000000051211332531025015462 0ustar haypohaypoinclude AUTHORS include ChangeLog include COPYING include gnome/hachoir include gnome/README include hachoir_metadata/qt/dialog.ui include INSTALL include kde/hachoir.desktop include kde/hachoir-metadata-kde include kde/README include MANIFEST.in include metadata_csv.py include README include run_testcase.py include test_doc.py hachoir-metadata-1.3.3/hachoir-metadata-gtk0000755000175000017500000000702411251277274017652 0ustar haypohaypo#!/usr/bin/python import sys, pygtk, os pygtk.require('2.0') import gtk from hachoir_core.cmd_line import unicodeFilename from hachoir_parser import createParser from hachoir_metadata import extractMetadata from hachoir_metadata.metadata import MultipleMetadata class Gui: def __init__(self): self.main_window = gtk.Window(gtk.WINDOW_TOPLEVEL) self.main_window.set_border_width(5) self.main_window.connect("destroy", self._destroy) self.main_vbox = gtk.VBox() self.select_hbox = gtk.HBox() self.select_button = gtk.Button("Select") self.select_button.connect("clicked", self._select_clicked) self.select_hbox.pack_start(self.select_button, False) self.file_combo = gtk.combo_box_new_text() self.file_combo.connect("changed", self._file_combo_changed) self.select_hbox.pack_start(self.file_combo) self.main_vbox.pack_start(self.select_hbox, False) self.metadata_table = gtk.Table(1, 1) self.metadata_table.attach(gtk.Label("Select a file to view metadata information..."), 0, 1, 0, 1) self.main_vbox.pack_start(self.metadata_table) self.main_window.add(self.main_vbox) self.main_window.show_all() def add_file(self, filename): self.file_combo.append_text(filename) def _select_clicked(self, widget): file_chooser = gtk.FileChooserDialog("Ouvrir..", None, gtk.FILE_CHOOSER_ACTION_OPEN, (gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL, gtk.STOCK_OPEN, gtk.RESPONSE_OK)) file_chooser.set_default_response(gtk.RESPONSE_OK) file_chooser.show() reponse = file_chooser.run() if reponse == gtk.RESPONSE_OK: selected_file = file_chooser.get_filename() self.add_file(selected_file) file_chooser.destroy() def _file_combo_changed(self, widget): self.main_vbox.remove(self.metadata_table) real_filename = self.file_combo.get_active_text() filename = unicodeFilename(real_filename) parser = createParser(filename, real_filename=real_filename) metadata = extractMetadata(parser) self.metadata_table = gtk.Table(1, 2) self.main_vbox.pack_start(self.metadata_table) if metadata is None: self.metadata_table.attach(gtk.Label("Unknown file format"), 0, 1, 0, 1) else: total = 1 for data in sorted(metadata): if not data.values: continue title = data.description for item in data.values: self.metadata_table.resize(total, 2) value = item.text self.metadata_table.attach(gtk.Label(title + ":"), 0, 1, total-1, total) self.metadata_table.attach(gtk.Label(value), 1, 2, total-1, total) total += 1 self.metadata_table.show_all() def _destroy(self, widget, data=None): gtk.main_quit() def main(self): has_file = False for arg in sys.argv[1:]: if os.path.isdir(arg): for file in os.listdir(arg): path = os.path.join(arg, file) if os.path.isfile(path): self.add_file(path) has_file = True elif os.path.isfile(arg): self.add_file(arg) has_file = True if has_file: self.file_combo.set_active(0) gtk.main() if __name__ == "__main__": Gui().main() hachoir-metadata-1.3.3/run_testcase.py0000755000175000017500000004367311251277274017034 0ustar haypohaypo#!/usr/bin/env python2.4 # -*- coding: utf-8 -*- """ Test hachoir-metadata using the testcase. """ DOWNLOAD_SCRIPT = "download_testcase.py" # Configure Hachoir from hachoir_core import config config.use_i18n = False # Don't use i18n config.quiet = True # Don't display warnings from hachoir_core.i18n import getTerminalCharset from hachoir_core.error import HachoirError from hachoir_core.stream import InputStreamError from hachoir_parser import createParser from hachoir_core.compatibility import all from hachoir_core.language import Language from hachoir_metadata import extractMetadata from hachoir_metadata.timezone import createTimezone from datetime import date, timedelta, datetime from locale import setlocale, LC_ALL import os import sys def checkAttr(metadata, name, value): sys.stdout.write(" - Check metadata %s=%s: " % (name, repr(value))) if not isinstance(value, (list, tuple)): value = [value] # Has subgroup? (eg. "audio/sample_rate") if "/" in name: group, name = name.split("/", 1) if group not in metadata: sys.stdout.write("no group \"%s\"!\n" % group) return False metadata = metadata[group] # Has asked attribute? if not metadata.has(name): sys.stdout.write("no attribute \"%s\"!\n" % name) return False # Read value reads = metadata.getValues(name) # Check value if len(reads) != len(value): sys.stdout.write("wrong len (%s instead of %s)!\n" % (len(reads), len(value))) return False values = value for index, value in enumerate(values): read = reads[index] # Check type if type(read) != type(value) \ and not(isinstance(value, (int, long)) and isinstance(value, (int, long))): sys.stdout.write("wrong type (%s instead of %s)!\n" % (type(read).__name__, type(value).__name__)) return False # Check value if value != read: sys.stdout.write("wrong value %s (%r instead of %r)!\n" % (index, read, value)) return False sys.stdout.write("ok\n") return True def checkLogoUbuntuMeta(metadata): return ( checkAttr(metadata, "bits_per_pixel", 32), checkAttr(metadata, "creation_date", datetime(2006, 5, 26, 9, 41, 46)), checkAttr(metadata, "mime_type", u"image/png")) def checkClickMeta(metadata): return ( checkAttr(metadata, "producer", u"Sound Forge 4.5"), checkAttr(metadata, "creation_date", date(2001, 2, 21)), checkAttr(metadata, "duration", timedelta(microseconds=19546)), checkAttr(metadata, "bit_rate", 705600), checkAttr(metadata, "sample_rate", 22050)) def checkGzipMeta(metadata): return ( checkAttr(metadata, "file_size", 99), checkAttr(metadata, "compr_size", 90), checkAttr(metadata, "last_modification", datetime(2006, 7, 29, 12, 20, 44)), checkAttr(metadata, "os", u"Unix"), checkAttr(metadata, "compression", u"deflate")) def checkSheepMeta(metadata): return ( checkAttr(metadata, "format_version", u"MPEG version 1 layer III"), checkAttr(metadata, "author", u"Sheep On Drugs"), checkAttr(metadata, "comment", u"Stainless Steel Provider is compilated to the car of Twinstar.")) def checkPng331_90_8Meta(metadata): return ( checkAttr(metadata, "width", 331), checkAttr(metadata, "creation_date", datetime(2006, 5, 26, 9, 41, 46)), checkAttr(metadata, "mime_type", u"image/png"), checkAttr(metadata, "endian", u"Big endian")) def checkFlashMobInfo(metadata): return ( checkAttr(metadata, "copyright", u"© dadaprod, licence Creative Commons by-nc-sa 2.0 fr"), checkAttr(metadata, "video[1]/width", 384), checkAttr(metadata, "video[1]/language", Language('fre')), checkAttr(metadata, "duration", timedelta(seconds=17, milliseconds=844)), ) def check10min(meta): return ( checkAttr(meta, "duration", timedelta(minutes=10)), checkAttr(meta, "producer", [u"x264", u"Haali Matroska Writer b0"]), checkAttr(meta, "video[1]/width", 384), checkAttr(meta, "video[1]/height", 288), checkAttr(meta, "video[1]/compression", u"V_MPEG4/ISO/AVC"), ) def checkWormuxIco(meta): return ( checkAttr(meta, "image[0]/width", 16), checkAttr(meta, "image[0]/height", 16), checkAttr(meta, "image[0]/bits_per_pixel", 32), checkAttr(meta, "image[0]/compression", u"Uncompressed (RGB)"), ) def checkAudio8kHz(meta): return ( checkAttr(meta, "mime_type", u"audio/basic"), checkAttr(meta, "nb_channel", 1), checkAttr(meta, "bits_per_sample", 8), checkAttr(meta, "bit_rate", 64096), checkAttr(meta, "sample_rate", 8012), checkAttr(meta, "compression", u"8-bit ISDN u-law"), checkAttr(meta, "comment", u"../tmp/temp.snd"), checkAttr(meta, "duration", timedelta(seconds=4, microseconds=391538)), ) def checkCrossXCF(meta): return ( checkAttr(meta, "comment", u"Created with The GIMP"), checkAttr(meta, "width", 61), checkAttr(meta, "height", 72), checkAttr(meta, "compression", u"RLE"), checkAttr(meta, "mime_type", u"image/x-xcf")) def checkTARMeta(meta): return ( checkAttr(meta, "file[0]/filename", u"dummy.txt"), checkAttr(meta, "file[0]/file_size", 62), checkAttr(meta, "file[1]/file_attr", u"-rwxr-xr-x (755)"), checkAttr(meta, "file[1]/last_modification", datetime(2006, 10, 1, 13, 9, 3)), checkAttr(meta, "file[2]/file_type", u"Normal disk file"), ) def checkCornerBMPMeta(meta): return ( checkAttr(meta, "width", 189), checkAttr(meta, "nb_colors", 70), checkAttr(meta, "compression", u"Uncompressed"), checkAttr(meta, "mime_type", u"image/x-ms-bmp"), ) def checkSmallville(metadata): return ( checkAttr(metadata, "duration", timedelta(minutes=44, seconds=1, microseconds=141141)), checkAttr(metadata, "producer", u"VirtualDubMod 1.5.10.1 (build 2366/release)"), checkAttr(metadata, "video/width", 640), checkAttr(metadata, "video/height", 352), checkAttr(metadata, "video/compression", u'XviD MPEG-4 (fourcc:"xvid")'), checkAttr(metadata, "video/frame_rate", 23.976), checkAttr(metadata, "audio[1]/nb_channel", 2), checkAttr(metadata, "audio[1]/sample_rate", 48000), checkAttr(metadata, "audio[1]/compression", u"MPEG Layer 3")) def checkLechat(meta): return ( checkAttr(meta, "album", [u"Arte Radio", u"Chat Broodthaers"]), checkAttr(meta, "url", u"Liens direct ARTE Radio: www.arteradio.com/son.html?473"), checkAttr(meta, "creation_date", date(2003, 1, 1)), checkAttr(meta, "producer", u"www.arteradio.com"), checkAttr(meta, "sample_rate", 44100), checkAttr(meta, "bit_rate", 128000)) def checkJpegExifPSD(meta): return ( checkAttr(meta, "producer", [u"Adobe Photoshop 7.0"]), checkAttr(meta, "width", 124), checkAttr(meta, "compression", u"JPEG (Progressive)"), checkAttr(meta, "creation_date", datetime(2006, 6, 28, 14, 51, 9))) def checkInterludeDavid(meta): return ( checkAttr(meta, "title", u"interlude symbiosys1"), checkAttr(meta, "artist", u"david aubrun"), checkAttr(meta, "duration", timedelta(minutes=1, seconds=12, microseconds=19592)), checkAttr(meta, "audio[1]/nb_channel", 2), checkAttr(meta, "audio[1]/format_version", u"Vorbis version 0"), checkAttr(meta, "audio[1]/sample_rate", 44100), checkAttr(meta, "mime_type", u"audio/vorbis"), ) def checkBreakdance(meta): return ( checkAttr(meta, "audio/sample_rate", 22050), checkAttr(meta, "duration", timedelta(seconds=46, milliseconds=942)), checkAttr(meta, "producer", [u"YouTube, Inc.", u"YouTube Metadata Injector."]), ) def checkMatrixPingPong(meta): return ( checkAttr(meta, "title", u"欽ちゃん&香取慎吾の全日本仮装大賞"), checkAttr(meta, "duration", timedelta(minutes=1, seconds=47, milliseconds=258)), checkAttr(meta, "creation_date", datetime(2003, 6, 16, 7, 57, 23, 235000)), checkAttr(meta, "audio[1]/sample_rate", 8000), checkAttr(meta, "audio[1]/bits_per_sample", 16), checkAttr(meta, "audio[1]/compression", u"Windows Media Audio V7 / V8 / V9"), checkAttr(meta, "video[1]/width", 200), checkAttr(meta, "video[1]/height", 150), checkAttr(meta, "video[1]/bits_per_pixel", 24), ) def checkUSARailroad(meta): return ( # Check IPTC parser checkAttr(meta, "author", u"Ian Britton"), checkAttr(meta, "copyright", u"FreeFoto.com"), ) def checkHero(meta): return ( checkAttr(meta, "width", 320), checkAttr(meta, "bits_per_pixel", 8), checkAttr(meta, "nb_colors", 256), checkAttr(meta, "compression", u"8-bit uncompressed"), ) def check25min(meta): return ( checkAttr(meta, "duration", timedelta(minutes=25, seconds=33)), checkAttr(meta, "nb_channel", 2), checkAttr(meta, "sample_rate", 44100), checkAttr(meta, "bit_rate", 1411200), checkAttr(meta, "bits_per_sample", 16), checkAttr(meta, "compression", u"Little-endian, no compression"), ) def checkLadouce(meta): return ( checkAttr(meta, "duration", timedelta(hours=1, minutes=16, seconds=32, microseconds=516032)), checkAttr(meta, "nb_channel", 6), checkAttr(meta, "sample_rate", 44100), checkAttr(meta, "bits_per_sample", 32), checkAttr(meta, "compression", u"IEEE Float"), checkAttr(meta, "bit_rate", 8467200), ) def checkLaraCroft(meta): return ( checkAttr(meta, "width", 320), checkAttr(meta, "nb_colors", 256), checkAttr(meta, "compression", u"Run-length encoding (RLE)"), ) def checkHachoirOrgSXW(meta): return ( checkAttr(meta, "mime_type", u"application/vnd.sun.xml.writer"), checkAttr(meta, "file[0]/file_size", 30), checkAttr(meta, "file[1]/creation_date", datetime(2007, 1, 22, 19, 8, 14)), checkAttr(meta, "file[2]/filename", u"Configurations2/accelerator/current.xml"), checkAttr(meta, "file[2]/compression", u"Deflate"), ) def checkFirstRun(meta): return ( checkAttr(meta, "duration", timedelta(seconds=17, milliseconds=66)), checkAttr(meta, "creation_date", datetime(2000, 6, 14, 10, 3, 18)), checkAttr(meta, "copyright", u"©2000 RealNetworks"), checkAttr(meta, "producer", u"RealProducer Plus 6.1.0.153 Windows"), checkAttr(meta, "stream[0]/mime_type", u"audio/x-pn-realaudio"), checkAttr(meta, "stream[0]/bit_rate", 32148), checkAttr(meta, "stream[0]/title", u"Audio Stream"), checkAttr(meta, "mime_type", u"audio/x-pn-realaudio"), checkAttr(meta, "bit_rate", 32348), checkAttr(meta, "stream[1]/bit_rate", 200), ) def checkDejaVu(meta): return ( checkAttr(meta, "title", u"DejaVu Serif"), checkAttr(meta, "author", u"DejaVu fonts team"), checkAttr(meta, "version", u"2.7"), checkAttr(meta, "creation_date", datetime(2006, 7, 6, 17, 29, 52)), checkAttr(meta, "last_modification", datetime(2006, 7, 6, 17, 29, 52)), checkAttr(meta, "copyright", [ u"Copyright (c) 2003 by Bitstream, Inc. All Rights Reserved.\nDejaVu changes are in public domain", u"http://dejavu.sourceforge.net/wiki/index.php/License"]), checkAttr(meta, "url", u"http://dejavu.sourceforge.net"), checkAttr(meta, "comment", [ u"Smallest readable size in pixels: 8 pixels", u"Font direction: Mixed directional"]), ) def checkTwunk16(meta): return ( checkAttr(meta, "title", [ u"Twain_32.dll Client's 16-Bit Thunking Server", u"Twain Thunker"]), checkAttr(meta, "author", u"Twain Working Group"), checkAttr(meta, "version", u"1,7,0,0"), checkAttr(meta, "format_version", u"New-style executable: Dynamic-link library (DLL)"), ) def checkDebianTorrent(meta): return ( checkAttr(meta, "filename", u"debian-31r4-i386-binary-1.iso"), checkAttr(meta, "url", u"http://bttracker.acc.umu.se:6969/announce"), checkAttr(meta, "file_size", 669775872), checkAttr(meta, "creation_date", datetime(2006, 11, 16, 21, 44, 37)), ) def checkGreenFire(meta): return ( checkAttr(meta, 'height', 64), checkAttr(meta, 'bits_per_pixel', 32), checkAttr(meta, 'comment', (u"Intel(R) JPEG Library, version 1,5,4,36", u"JPEG quality: 80%")), ) def checkMarcKravetz(meta): return ( checkAttr(meta, 'creation_date', datetime(2007, 7, 19, 9, 3, 57, tzinfo=createTimezone(2))), checkAttr(meta, 'sample_rate', 48000), checkAttr(meta, 'compr_rate', 12.0), checkAttr(meta, 'album', u"France Culture - Le portrait du jour par Marc Kravetz"), checkAttr(meta, 'author', u"Marc Kravetz"), checkAttr(meta, 'duration', timedelta(0, 2, 400000)), checkAttr(meta, 'bit_rate', 128000), checkAttr(meta, 'track_number', 32), checkAttr(meta, 'bits_per_sample', 16), checkAttr(meta, 'copyright', u"Radio France"), checkAttr(meta, 'format_version', u"MPEG version 1 layer III"), ) def checkPentax320(meta): return ( checkAttr(meta, 'width', 320), checkAttr(meta, 'height', 240), checkAttr(meta, 'duration', timedelta(0, 4, 966667)), checkAttr(meta, 'creation_date', datetime(2005, 8, 11, 14, 3, 54)), checkAttr(meta, 'last_modification', datetime(2005, 8, 11, 14, 3, 54)), ) def checkGPS(meta): return ( checkAttr(meta, 'altitude', 78.0), checkAttr(meta, 'creation_date', datetime(2003, 5, 24, 22, 29, 14)), checkAttr(meta, 'latitude', 35.616019444444447), checkAttr(meta, 'longitude', 139.69731666666667), checkAttr(meta, 'camera_model', u'A5301T'), checkAttr(meta, 'camera_manufacturer', u'KDDI-TS'), ) def checkAngelBear(meta): return ( checkAttr(meta, 'title', u"Angel Bear"), checkAttr(meta, 'artist', u"Copyright ©Loraine Wauer-Ferus http://www.billybear4kids.com"), checkAttr(meta, 'frame_rate', 4.0), ) def checkHotelCalifornia(meta): return ( checkAttr(meta, 'title', u"Hotel California"), checkAttr(meta, 'artist', u"The Eagles"), checkAttr(meta, 'duration', timedelta(seconds=51, microseconds=512834)), checkAttr(meta, 'nb_channel', 2), checkAttr(meta, 'sample_rate', 44100), checkAttr(meta, 'bits_per_sample', 16), checkAttr(meta, 'producer', u'reference libFLAC 1.1.2 20050205'), ) def checkRadpoor(meta): return ( checkAttr(meta, 'title', u"\u062a\u0633\u062a"), checkAttr(meta, 'author', u'Soroosh Radpoor'), checkAttr(meta, 'creation_date', datetime(2008, 9, 2, 16, 8, 30)), ) def checkQuicktime(meta): return ( checkAttr(meta, 'width', 190), checkAttr(meta, 'height', 240), checkAttr(meta, 'creation_date', datetime(2005, 10, 28, 17, 46, 46)), checkAttr(meta, 'mime_type', u'video/mp4'), ) def checkFile(filename, check_metadata, quality=1.0): sys.stdout.write(" - Create parser: ") sys.stdout.flush() try: parser = createParser(filename) except InputStreamError, err: sys.stdout.write("stream error! %s\n" % unicode(err)) sys.exit(1) if not parser: sys.stdout.write("unable to create parser\n") return False sys.stdout.write("ok\n") sys.stdout.write(" - Create metadata: ") sys.stdout.flush() try: metadata = extractMetadata(parser, quality) except HachoirError, err: sys.stdout.write("stream error! %s\n" % unicode(err)) sys.exit(1) if not metadata: sys.stdout.write("unable to create parser\n") return False sys.stdout.write("ok\n") return all(check_metadata(metadata)) def testFiles(directory): if not os.path.exists(directory): try: os.mkdir(directory) except OSError: print "Unable to create directory: %s" % directory return False for filename, check_metadata in testcase_files: fullname = os.path.join(directory, filename) try: os.stat(fullname) except OSError: print >>sys.stderr, \ "[!] Error: file %s is missing, " \ "use script %s to fix your testcase" \ % (filename, DOWNLOAD_SCRIPT) return False print "[+] Test %s:" % filename if not checkFile(fullname, check_metadata): return False return True def main(): setlocale(LC_ALL, "C") if len(sys.argv) != 2: print >>sys.stderr, "usage: %s testcase_directory" % sys.argv[0] sys.exit(1) charset = getTerminalCharset() directory = unicode(sys.argv[1], charset) print "Test hachoir-metadata using testcase." print print "Testcase is in directory: %s" % directory ok = testFiles(directory) if ok: print print "Result: ok for the %s files" % len(testcase_files) sys.exit(0) else: print for index in xrange(3): print "!!! ERROR !!!" print sys.exit(1) testcase_files = ( (u"logo-kubuntu.png", checkLogoUbuntuMeta), (u"kde_click.wav", checkClickMeta), (u"test.txt.gz", checkGzipMeta), (u"flashmob.mkv", checkFlashMobInfo), (u"10min.mkv", check10min), (u"wormux_32x32_16c.ico", checkWormuxIco), (u"audio_8khz_8bit_ulaw_4s39.au", checkAudio8kHz), (u"sheep_on_drugs.mp3", checkSheepMeta), (u"cross.xcf", checkCrossXCF), (u"small_text.tar", checkTARMeta), (u"kde_haypo_corner.bmp", checkCornerBMPMeta), (u"png_331x90x8_truncated.png", checkPng331_90_8Meta), (u"smallville.s03e02.avi", checkSmallville), (u"08lechat_hq_fr.mp3", checkLechat), (u"jpeg.exif.photoshop.jpg", checkJpegExifPSD), (u"interlude_david_aubrun.ogg", checkInterludeDavid), (u"breakdance.flv", checkBreakdance), (u"matrix_ping_pong.wmv", checkMatrixPingPong), (u"usa_railroad.jpg", checkUSARailroad), (u"hero.tga", checkHero), (u"25min.aifc", check25min), (u"ladouce_1h15.wav", checkLadouce), (u"lara_croft.pcx", checkLaraCroft), (u"hachoir.org.sxw", checkHachoirOrgSXW), (u"firstrun.rm", checkFirstRun), (u"deja_vu_serif-2.7.ttf", checkDejaVu), (u"twunk_16.exe", checkTwunk16), (u"debian-31r4-i386-binary-1.iso.torrent", checkDebianTorrent), (u"green_fire.jpg", checkGreenFire), (u"marc_kravetz.mp3", checkMarcKravetz), (u"pentax_320x240.mov", checkPentax320), (u"gps.jpg", checkGPS), (u"angle-bear-48x48.ani", checkAngelBear), (u"hotel_california.flac", checkHotelCalifornia), (u"radpoor.doc", checkRadpoor), (u"quicktime.mp4", checkQuicktime), ) if __name__ == "__main__": main() hachoir-metadata-1.3.3/hachoir_metadata/0000755000175000017500000000000011423160161017203 5ustar haypohaypohachoir-metadata-1.3.3/hachoir_metadata/formatter.py0000644000175000017500000000113411251277274021574 0ustar haypohaypofrom hachoir_core.i18n import _, ngettext NB_CHANNEL_NAME = {1: _("mono"), 2: _("stereo")} def humanAudioChannel(value): return NB_CHANNEL_NAME.get(value, unicode(value)) def humanFrameRate(value): if isinstance(value, (int, long, float)): return _("%.1f fps") % value else: return value def humanComprRate(rate): return u"%.1fx" % rate def humanAltitude(value): return ngettext("%.1f meter", "%.1f meters", value) % value def humanPixelSize(value): return ngettext("%s pixel", "%s pixels", value) % value def humanDPI(value): return u"%s DPI" % value hachoir-metadata-1.3.3/hachoir_metadata/jpeg.py0000644000175000017500000002506411325706506020523 0ustar haypohaypofrom hachoir_metadata.metadata import RootMetadata, registerExtractor from hachoir_metadata.image import computeComprRate from hachoir_parser.image.exif import ExifEntry from hachoir_parser.image.jpeg import ( JpegFile, JpegChunk, QUALITY_HASH_COLOR, QUALITY_SUM_COLOR, QUALITY_HASH_GRAY, QUALITY_SUM_GRAY) from hachoir_core.field import MissingField from hachoir_core.i18n import _ from hachoir_core.tools import makeUnicode from hachoir_metadata.safe import fault_tolerant from datetime import datetime def deg2float(degree, minute, second): return degree + (float(minute) + float(second) / 60.0) / 60.0 class JpegMetadata(RootMetadata): EXIF_KEY = { # Exif metadatas ExifEntry.TAG_CAMERA_MANUFACTURER: "camera_manufacturer", ExifEntry.TAG_CAMERA_MODEL: "camera_model", ExifEntry.TAG_ORIENTATION: "image_orientation", ExifEntry.TAG_EXPOSURE: "camera_exposure", ExifEntry.TAG_FOCAL: "camera_focal", ExifEntry.TAG_BRIGHTNESS: "camera_brightness", ExifEntry.TAG_APERTURE: "camera_aperture", # Generic metadatas ExifEntry.TAG_IMG_TITLE: "title", ExifEntry.TAG_SOFTWARE: "producer", ExifEntry.TAG_FILE_TIMESTAMP: "creation_date", ExifEntry.TAG_WIDTH: "width", ExifEntry.TAG_HEIGHT: "height", ExifEntry.TAG_USER_COMMENT: "comment", } IPTC_KEY = { 80: "author", 90: "city", 101: "country", 116: "copyright", 120: "title", 231: "comment", } orientation_name = { 1: _('Horizontal (normal)'), 2: _('Mirrored horizontal'), 3: _('Rotated 180'), 4: _('Mirrored vertical'), 5: _('Mirrored horizontal then rotated 90 counter-clock-wise'), 6: _('Rotated 90 clock-wise'), 7: _('Mirrored horizontal then rotated 90 clock-wise'), 8: _('Rotated 90 counter clock-wise'), } def extract(self, jpeg): if "start_frame/content" in jpeg: self.startOfFrame(jpeg["start_frame/content"]) elif "start_scan/content/nr_components" in jpeg: self.bits_per_pixel = 8 * jpeg["start_scan/content/nr_components"].value if "app0/content" in jpeg: self.extractAPP0(jpeg["app0/content"]) if "exif/content" in jpeg: for ifd in jpeg.array("exif/content/ifd"): for entry in ifd.array("entry"): self.processIfdEntry(ifd, entry) self.readGPS(ifd) if "photoshop/content" in jpeg: psd = jpeg["photoshop/content"] if "version/content/reader_name" in psd: self.producer = psd["version/content/reader_name"].value if "iptc/content" in psd: self.parseIPTC(psd["iptc/content"]) for field in jpeg.array("comment"): if "content/comment" in field: self.comment = field["content/comment"].value self.computeQuality(jpeg) if "data" in jpeg: computeComprRate(self, jpeg["data"].size) if not self.has("producer") and "photoshop" in jpeg: self.producer = u"Adobe Photoshop" if self.has("compression"): self.compression = "JPEG" @fault_tolerant def startOfFrame(self, sof): # Set compression method key = sof["../type"].value self.compression = "JPEG (%s)" % JpegChunk.START_OF_FRAME[key] # Read image size and bits/pixel self.width = sof["width"].value self.height = sof["height"].value nb_components = sof["nr_components"].value self.bits_per_pixel = 8 * nb_components if nb_components == 3: self.pixel_format = _("YCbCr") elif nb_components == 1: self.pixel_format = _("Grayscale") self.nb_colors = 256 @fault_tolerant def computeQuality(self, jpeg): # This function is an adaption to Python of ImageMagick code # to compute JPEG quality using quantization tables # Read quantization tables qtlist = [] for dqt in jpeg.array("quantization"): for qt in dqt.array("content/qt"): # TODO: Take care of qt["index"].value? qtlist.append(qt) if not qtlist: return # Compute sum of all coefficients sumcoeff = 0 for qt in qtlist: coeff = qt.array("coeff") for index in xrange(64): sumcoeff += coeff[index].value # Choose the right quality table and compute hash value try: hashval= qtlist[0]["coeff[2]"].value + qtlist[0]["coeff[53]"].value if 2 <= len(qtlist): hashval += qtlist[1]["coeff[0]"].value + qtlist[1]["coeff[63]"].value hashtable = QUALITY_HASH_COLOR sumtable = QUALITY_SUM_COLOR else: hashtable = QUALITY_HASH_GRAY sumtable = QUALITY_SUM_GRAY except (MissingField, IndexError): # A coefficient is missing, so don't compute JPEG quality return # Find the JPEG quality for index in xrange(100): if (hashval >= hashtable[index]) or (sumcoeff >= sumtable[index]): quality = "%s%%" % (index + 1) if (hashval > hashtable[index]) or (sumcoeff > sumtable[index]): quality += " " + _("(approximate)") self.comment = "JPEG quality: %s" % quality return @fault_tolerant def extractAPP0(self, app0): self.format_version = u"JFIF %u.%02u" \ % (app0["ver_maj"].value, app0["ver_min"].value) if "y_density" in app0: self.width_dpi = app0["x_density"].value self.height_dpi = app0["y_density"].value @fault_tolerant def processIfdEntry(self, ifd, entry): # Skip unknown tags tag = entry["tag"].value if tag not in self.EXIF_KEY: return key = self.EXIF_KEY[tag] if key in ("width", "height") and self.has(key): # EXIF "valid size" are sometimes not updated when the image is scaled # so we just ignore it return # Read value if "value" in entry: value = entry["value"].value else: value = ifd["value_%s" % entry.name].value # Convert value to string if tag == ExifEntry.TAG_ORIENTATION: value = self.orientation_name.get(value, value) elif tag == ExifEntry.TAG_EXPOSURE: if not value: return if isinstance(value, float): value = (value, u"1/%g" % (1/value)) elif entry["type"].value in (ExifEntry.TYPE_RATIONAL, ExifEntry.TYPE_SIGNED_RATIONAL): value = (value, u"%.3g" % value) # Store information setattr(self, key, value) @fault_tolerant def readGPS(self, ifd): # Read latitude and longitude latitude_ref = None longitude_ref = None latitude = None longitude = None altitude_ref = 1 altitude = None timestamp = None datestamp = None for entry in ifd.array("entry"): tag = entry["tag"].value if tag == ExifEntry.TAG_GPS_LATITUDE_REF: if entry["value"].value == "N": latitude_ref = 1 else: latitude_ref = -1 elif tag == ExifEntry.TAG_GPS_LONGITUDE_REF: if entry["value"].value == "E": longitude_ref = 1 else: longitude_ref = -1 elif tag == ExifEntry.TAG_GPS_ALTITUDE_REF: if entry["value"].value == 1: altitude_ref = -1 else: altitude_ref = 1 elif tag == ExifEntry.TAG_GPS_LATITUDE: latitude = [ifd["value_%s[%u]" % (entry.name, index)].value for index in xrange(3)] elif tag == ExifEntry.TAG_GPS_LONGITUDE: longitude = [ifd["value_%s[%u]" % (entry.name, index)].value for index in xrange(3)] elif tag == ExifEntry.TAG_GPS_ALTITUDE: altitude = ifd["value_%s" % entry.name].value elif tag == ExifEntry.TAG_GPS_DATESTAMP: datestamp = ifd["value_%s" % entry.name].value elif tag == ExifEntry.TAG_GPS_TIMESTAMP: items = [ifd["value_%s[%u]" % (entry.name, index)].value for index in xrange(3)] items = map(int, items) items = map(str, items) timestamp = ":".join(items) if latitude_ref and latitude: value = deg2float(*latitude) if latitude_ref < 0: value = -value self.latitude = value if longitude and longitude_ref: value = deg2float(*longitude) if longitude_ref < 0: value = -value self.longitude = value if altitude: value = altitude if altitude_ref < 0: value = -value self.altitude = value if datestamp: if timestamp: datestamp += " " + timestamp self.creation_date = datestamp def parseIPTC(self, iptc): datestr = hourstr = None for field in iptc: # Skip incomplete field if "tag" not in field or "content" not in field: continue # Get value value = field["content"].value if isinstance(value, (str, unicode)): value = value.replace("\r", " ") value = value.replace("\n", " ") # Skip unknown tag tag = field["tag"].value if tag == 55: datestr = value continue if tag == 60: hourstr = value continue if tag not in self.IPTC_KEY: if tag != 0: self.warning("Skip IPTC key %s: %s" % ( field["tag"].display, makeUnicode(value))) continue setattr(self, self.IPTC_KEY[tag], value) if datestr and hourstr: try: year = int(datestr[0:4]) month = int(datestr[4:6]) day = int(datestr[6:8]) hour = int(hourstr[0:2]) min = int(hourstr[2:4]) sec = int(hourstr[4:6]) self.creation_date = datetime(year, month, day, hour, min, sec) except ValueError: pass registerExtractor(JpegFile, JpegMetadata) hachoir-metadata-1.3.3/hachoir_metadata/misc.py0000644000175000017500000002131011251277274020522 0ustar haypohaypofrom hachoir_metadata.metadata import RootMetadata, registerExtractor from hachoir_metadata.safe import fault_tolerant from hachoir_parser.container import SwfFile from hachoir_parser.misc import TorrentFile, TrueTypeFontFile, OLE2_File, PcfFile from hachoir_core.field import isString from hachoir_core.error import warning from hachoir_parser import guessParser from hachoir_metadata.setter import normalizeString class TorrentMetadata(RootMetadata): KEY_TO_ATTR = { u"announce": "url", u"comment": "comment", u"creation_date": "creation_date", } INFO_TO_ATTR = { u"length": "file_size", u"name": "filename", } def extract(self, torrent): for field in torrent[0]: self.processRoot(field) @fault_tolerant def processRoot(self, field): if field.name in self.KEY_TO_ATTR: key = self.KEY_TO_ATTR[field.name] value = field.value setattr(self, key, value) elif field.name == "info" and "value" in field: for field in field["value"]: self.processInfo(field) @fault_tolerant def processInfo(self, field): if field.name in self.INFO_TO_ATTR: key = self.INFO_TO_ATTR[field.name] value = field.value setattr(self, key, value) elif field.name == "piece_length": self.comment = "Piece length: %s" % field.display class TTF_Metadata(RootMetadata): NAMEID_TO_ATTR = { 0: "copyright", # Copyright notice 3: "title", # Unique font identifier 5: "version", # Version string 8: "author", # Manufacturer name 11: "url", # URL Vendor 14: "copyright", # License info URL } def extract(self, ttf): if "header" in ttf: self.extractHeader(ttf["header"]) if "names" in ttf: self.extractNames(ttf["names"]) @fault_tolerant def extractHeader(self, header): self.creation_date = header["created"].value self.last_modification = header["modified"].value self.comment = u"Smallest readable size in pixels: %s pixels" % header["lowest"].value self.comment = u"Font direction: %s" % header["font_dir"].display @fault_tolerant def extractNames(self, names): offset = names["offset"].value for header in names.array("header"): key = header["nameID"].value foffset = offset + header["offset"].value field = names.getFieldByAddress(foffset*8) if not field or not isString(field): continue value = field.value if key not in self.NAMEID_TO_ATTR: continue key = self.NAMEID_TO_ATTR[key] if key == "version" and value.startswith(u"Version "): # "Version 1.2" => "1.2" value = value[8:] setattr(self, key, value) class OLE2_Metadata(RootMetadata): SUMMARY_ID_TO_ATTR = { 2: "title", # Title 3: "title", # Subject 4: "author", 6: "comment", 8: "author", # Last saved by 12: "creation_date", 13: "last_modification", 14: "nb_page", 18: "producer", } IGNORE_SUMMARY = set(( 1, # Code page )) DOC_SUMMARY_ID_TO_ATTR = { 3: "title", # Subject 14: "author", # Manager } IGNORE_DOC_SUMMARY = set(( 1, # Code page )) def extract(self, ole2): self._extract(ole2) def _extract(self, fieldset, main_document=True): if main_document: # _feedAll() is needed to make sure that we get all root[*] fragments fieldset._feedAll() if "root[0]" in fieldset: self.useRoot(fieldset["root[0]"]) doc_summary = self.getField(fieldset, main_document, "doc_summary[0]") if doc_summary: self.useSummary(doc_summary, True) word_doc = self.getField(fieldset, main_document, "word_doc[0]") if word_doc: self.useWordDocument(word_doc) summary = self.getField(fieldset, main_document, "summary[0]") if summary: self.useSummary(summary, False) @fault_tolerant def useRoot(self, root): stream = root.getSubIStream() ministream = guessParser(stream) if not ministream: warning("Unable to create the OLE2 mini stream parser!") return self._extract(ministream, main_document=False) def getField(self, fieldset, main_document, name): if name not in fieldset: return None # _feedAll() is needed to make sure that we get all fragments # eg. summary[0], summary[1], ..., summary[n] fieldset._feedAll() field = fieldset[name] if main_document: stream = field.getSubIStream() field = guessParser(stream) if not field: warning("Unable to create the OLE2 parser for %s!" % name) return None return field @fault_tolerant def useSummary(self, summary, is_doc_summary): if "os" in summary: self.os = summary["os"].display if "section[0]" not in summary: return summary = summary["section[0]"] for property in summary.array("property_index"): self.useProperty(summary, property, is_doc_summary) @fault_tolerant def useWordDocument(self, doc): self.comment = "Encrypted: %s" % doc["fEncrypted"].value @fault_tolerant def useProperty(self, summary, property, is_doc_summary): field = summary.getFieldByAddress(property["offset"].value*8) if not field \ or "value" not in field: return field = field["value"] if not field.hasValue(): return # Get value value = field.value if isinstance(value, (str, unicode)): value = normalizeString(value) if not value: return # Get property identifier prop_id = property["id"].value if is_doc_summary: id_to_attr = self.DOC_SUMMARY_ID_TO_ATTR ignore = self.IGNORE_DOC_SUMMARY else: id_to_attr = self.SUMMARY_ID_TO_ATTR ignore = self.IGNORE_SUMMARY if prop_id in ignore: return # Get Hachoir metadata key try: key = id_to_attr[prop_id] use_prefix = False except LookupError: key = "comment" use_prefix = True if use_prefix: prefix = property["id"].display if (prefix in ("TotalEditingTime", "LastPrinted")) \ and (not field): # Ignore null time delta return value = "%s: %s" % (prefix, value) else: if (key == "last_modification") and (not field): # Ignore null timestamp return setattr(self, key, value) class PcfMetadata(RootMetadata): PROP_TO_KEY = { 'CHARSET_REGISTRY': 'charset', 'COPYRIGHT': 'copyright', 'WEIGHT_NAME': 'font_weight', 'FOUNDRY': 'author', 'FONT': 'title', '_XMBDFED_INFO': 'producer', } def extract(self, pcf): if "properties" in pcf: self.useProperties(pcf["properties"]) def useProperties(self, properties): last = properties["total_str_length"] offset0 = last.address + last.size for index in properties.array("property"): # Search name and value value = properties.getFieldByAddress(offset0+index["value_offset"].value*8) if not value: continue value = value.value if not value: continue name = properties.getFieldByAddress(offset0+index["name_offset"].value*8) if not name: continue name = name.value if name not in self.PROP_TO_KEY: warning("Skip %s=%r" % (name, value)) continue key = self.PROP_TO_KEY[name] setattr(self, key, value) class SwfMetadata(RootMetadata): def extract(self, swf): self.height = swf["rect/ymax"].value # twips self.width = swf["rect/xmax"].value # twips self.format_version = "flash version %s" % swf["version"].value self.frame_rate = swf["frame_rate"].value self.comment = "Frame count: %s" % swf["frame_count"].value registerExtractor(TorrentFile, TorrentMetadata) registerExtractor(TrueTypeFontFile, TTF_Metadata) registerExtractor(OLE2_File, OLE2_Metadata) registerExtractor(PcfFile, PcfMetadata) registerExtractor(SwfFile, SwfMetadata) hachoir-metadata-1.3.3/hachoir_metadata/image.py0000644000175000017500000002504411325706765020665 0ustar haypohaypofrom hachoir_metadata.metadata import (registerExtractor, Metadata, RootMetadata, MultipleMetadata) from hachoir_parser.image import ( BmpFile, IcoFile, PcxFile, GifFile, PngFile, TiffFile, XcfFile, TargaFile, WMF_File, PsdFile) from hachoir_parser.image.png import getBitsPerPixel as pngBitsPerPixel from hachoir_parser.image.xcf import XcfProperty from hachoir_core.i18n import _ from hachoir_metadata.safe import fault_tolerant def computeComprRate(meta, compr_size): """ Compute image compression rate. Skip size of color palette, focus on image pixels. Original size is width x height x bpp. Compressed size is an argument (in bits). Set "compr_data" with a string like "1.52x". """ if not meta.has("width") \ or not meta.has("height") \ or not meta.has("bits_per_pixel"): return if not compr_size: return orig_size = meta.get('width') * meta.get('height') * meta.get('bits_per_pixel') meta.compr_rate = float(orig_size) / compr_size class BmpMetadata(RootMetadata): def extract(self, image): if "header" not in image: return hdr = image["header"] self.width = hdr["width"].value self.height = hdr["height"].value bpp = hdr["bpp"].value if bpp: if bpp <= 8 and "used_colors" in hdr: self.nb_colors = hdr["used_colors"].value self.bits_per_pixel = bpp self.compression = hdr["compression"].display self.format_version = u"Microsoft Bitmap version %s" % hdr.getFormatVersion() self.width_dpi = hdr["horizontal_dpi"].value self.height_dpi = hdr["vertical_dpi"].value if "pixels" in image: computeComprRate(self, image["pixels"].size) class TiffMetadata(RootMetadata): key_to_attr = { "img_width": "width", "img_height": "width", # TODO: Enable that (need link to value) # "description": "comment", # "doc_name": "title", # "orientation": "image_orientation", } def extract(self, tiff): if "ifd" in tiff: self.useIFD(tiff["ifd"]) def useIFD(self, ifd): for field in ifd: try: attrname = self.key_to_attr[field.name] except KeyError: continue if "value" not in field: continue value = field["value"].value setattr(self, attrname, value) class IcoMetadata(MultipleMetadata): color_to_bpp = { 2: 1, 16: 4, 256: 8 } def extract(self, icon): for index, header in enumerate(icon.array("icon_header")): image = Metadata(self) # Read size and colors from header image.width = header["width"].value image.height = header["height"].value bpp = header["bpp"].value nb_colors = header["nb_color"].value if nb_colors != 0: image.nb_colors = nb_colors if bpp == 0 and nb_colors in self.color_to_bpp: bpp = self.color_to_bpp[nb_colors] elif bpp == 0: bpp = 8 image.bits_per_pixel = bpp image.setHeader(_("Icon #%u (%sx%s)") % (1+index, image.get("width", "?"), image.get("height", "?"))) # Read compression from data (if available) key = "icon_data[%u]/header/codec" % index if key in icon: image.compression = icon[key].display key = "icon_data[%u]/pixels" % index if key in icon: computeComprRate(image, icon[key].size) # Store new image self.addGroup("image[%u]" % index, image) class PcxMetadata(RootMetadata): @fault_tolerant def extract(self, pcx): self.width = 1 + pcx["xmax"].value self.height = 1 + pcx["ymax"].value self.width_dpi = pcx["horiz_dpi"].value self.height_dpi = pcx["vert_dpi"].value self.bits_per_pixel = pcx["bpp"].value if 1 <= pcx["bpp"].value <= 8: self.nb_colors = 2 ** pcx["bpp"].value self.compression = _("Run-length encoding (RLE)") self.format_version = "PCX: %s" % pcx["version"].display if "image_data" in pcx: computeComprRate(self, pcx["image_data"].size) class XcfMetadata(RootMetadata): # Map image type to bits/pixel TYPE_TO_BPP = {0: 24, 1: 8, 2: 8} def extract(self, xcf): self.width = xcf["width"].value self.height = xcf["height"].value try: self.bits_per_pixel = self.TYPE_TO_BPP[ xcf["type"].value ] except KeyError: pass self.format_version = xcf["type"].display self.readProperties(xcf) @fault_tolerant def processProperty(self, prop): type = prop["type"].value if type == XcfProperty.PROP_PARASITES: for field in prop["data"]: if "name" not in field or "data" not in field: continue if field["name"].value == "gimp-comment": self.comment = field["data"].value elif type == XcfProperty.PROP_COMPRESSION: self.compression = prop["data/compression"].display elif type == XcfProperty.PROP_RESOLUTION: self.width_dpi = int(prop["data/xres"].value) self.height_dpi = int(prop["data/yres"].value) def readProperties(self, xcf): for prop in xcf.array("property"): self.processProperty(prop) class PngMetadata(RootMetadata): TEXT_TO_ATTR = { "software": "producer", } def extract(self, png): if "header" in png: self.useHeader(png["header"]) if "time" in png: self.useTime(png["time"]) if "physical" in png: self.usePhysical(png["physical"]) for comment in png.array("text"): if "text" not in comment: continue keyword = comment["keyword"].value text = comment["text"].value try: key = self.TEXT_TO_ATTR[keyword.lower()] setattr(self, key, text) except KeyError: if keyword.lower() != "comment": self.comment = "%s=%s" % (keyword, text) else: self.comment = text compr_size = sum( data.size for data in png.array("data") ) computeComprRate(self, compr_size) @fault_tolerant def useTime(self, field): self.creation_date = field.value @fault_tolerant def usePhysical(self, field): self.width_dpi = field["pixel_per_unit_x"].value self.height_dpi = field["pixel_per_unit_y"].value @fault_tolerant def useHeader(self, header): self.width = header["width"].value self.height = header["height"].value # Read number of colors and pixel format if "/palette/size" in header: nb_colors = header["/palette/size"].value // 3 else: nb_colors = None if not header["has_palette"].value: if header["has_alpha"].value: self.pixel_format = _("RGBA") else: self.pixel_format = _("RGB") elif "/transparency" in header: self.pixel_format = _("Color index with transparency") if nb_colors: nb_colors -= 1 else: self.pixel_format = _("Color index") self.bits_per_pixel = pngBitsPerPixel(header) if nb_colors: self.nb_colors = nb_colors # Read compression, timestamp, etc. self.compression = header["compression"].display class GifMetadata(RootMetadata): def extract(self, gif): self.useScreen(gif["/screen"]) if self.has("bits_per_pixel"): self.nb_colors = (1 << self.get('bits_per_pixel')) self.compression = _("LZW") self.format_version = "GIF version %s" % gif["version"].value for comments in gif.array("comments"): for comment in gif.array(comments.name + "/comment"): self.comment = comment.value if "graphic_ctl/has_transp" in gif and gif["graphic_ctl/has_transp"].value: self.pixel_format = _("Color index with transparency") else: self.pixel_format = _("Color index") @fault_tolerant def useScreen(self, screen): self.width = screen["width"].value self.height = screen["height"].value self.bits_per_pixel = (1 + screen["bpp"].value) class TargaMetadata(RootMetadata): def extract(self, tga): self.width = tga["width"].value self.height = tga["height"].value self.bits_per_pixel = tga["bpp"].value if tga["nb_color"].value: self.nb_colors = tga["nb_color"].value self.compression = tga["codec"].display if "pixels" in tga: computeComprRate(self, tga["pixels"].size) class WmfMetadata(RootMetadata): def extract(self, wmf): if wmf.isAPM(): if "amf_header/rect" in wmf: rect = wmf["amf_header/rect"] self.width = (rect["right"].value - rect["left"].value) self.height = (rect["bottom"].value - rect["top"].value) self.bits_per_pixel = 24 elif wmf.isEMF(): emf = wmf["emf_header"] if "description" in emf: desc = emf["description"].value if "\0" in desc: self.producer, self.title = desc.split("\0", 1) else: self.producer = desc if emf["nb_colors"].value: self.nb_colors = emf["nb_colors"].value self.bits_per_pixel = 8 else: self.bits_per_pixel = 24 self.width = emf["width_px"].value self.height = emf["height_px"].value class PsdMetadata(RootMetadata): @fault_tolerant def extract(self, psd): self.width = psd["width"].value self.height = psd["height"].value self.bits_per_pixel = psd["depth"].value * psd["nb_channels"].value self.pixel_format = psd["color_mode"].display self.compression = psd["compression"].display registerExtractor(IcoFile, IcoMetadata) registerExtractor(GifFile, GifMetadata) registerExtractor(XcfFile, XcfMetadata) registerExtractor(TargaFile, TargaMetadata) registerExtractor(PcxFile, PcxMetadata) registerExtractor(BmpFile, BmpMetadata) registerExtractor(PngFile, PngMetadata) registerExtractor(TiffFile, TiffMetadata) registerExtractor(WMF_File, WmfMetadata) registerExtractor(PsdFile, PsdMetadata) hachoir-metadata-1.3.3/hachoir_metadata/config.py0000644000175000017500000000006611251277274021041 0ustar haypohaypoMAX_STR_LENGTH = 300 # characters RAW_OUTPUT = False hachoir-metadata-1.3.3/hachoir_metadata/file_system.py0000644000175000017500000000210711251277274022115 0ustar haypohaypofrom hachoir_metadata.metadata import RootMetadata, registerExtractor from hachoir_metadata.safe import fault_tolerant from hachoir_parser.file_system import ISO9660 from datetime import datetime class ISO9660_Metadata(RootMetadata): def extract(self, iso): desc = iso['volume[0]/content'] self.title = desc['volume_id'].value self.title = desc['vol_set_id'].value self.author = desc['publisher'].value self.author = desc['data_preparer'].value self.producer = desc['application'].value self.copyright = desc['copyright'].value self.readTimestamp('creation_date', desc['creation_ts'].value) self.readTimestamp('last_modification', desc['modification_ts'].value) @fault_tolerant def readTimestamp(self, key, value): if value.startswith("0000"): return value = datetime( int(value[0:4]), int(value[4:6]), int(value[6:8]), int(value[8:10]), int(value[10:12]), int(value[12:14])) setattr(self, key, value) registerExtractor(ISO9660, ISO9660_Metadata) hachoir-metadata-1.3.3/hachoir_metadata/archive.py0000644000175000017500000001461611251277274021223 0ustar haypohaypofrom hachoir_metadata.metadata_item import QUALITY_BEST, QUALITY_FASTEST from hachoir_metadata.safe import fault_tolerant, getValue from hachoir_metadata.metadata import ( RootMetadata, Metadata, MultipleMetadata, registerExtractor) from hachoir_parser.archive import (Bzip2Parser, CabFile, GzipParser, TarFile, ZipFile, MarFile) from hachoir_core.tools import humanUnixAttributes from hachoir_core.i18n import _ def maxNbFile(meta): if meta.quality <= QUALITY_FASTEST: return 0 if QUALITY_BEST <= meta.quality: return None return 1 + int(10 * meta.quality) def computeCompressionRate(meta): """ Compute compression rate, sizes have to be in byte. """ if not meta.has("file_size") \ or not meta.get("compr_size", 0): return file_size = meta.get("file_size") if not file_size: return meta.compr_rate = float(file_size) / meta.get("compr_size") class Bzip2Metadata(RootMetadata): def extract(self, zip): if "file" in zip: self.compr_size = zip["file"].size/8 class GzipMetadata(RootMetadata): def extract(self, gzip): self.useHeader(gzip) computeCompressionRate(self) @fault_tolerant def useHeader(self, gzip): self.compression = gzip["compression"].display if gzip["mtime"]: self.last_modification = gzip["mtime"].value self.os = gzip["os"].display if gzip["has_filename"].value: self.filename = getValue(gzip, "filename") if gzip["has_comment"].value: self.comment = getValue(gzip, "comment") self.compr_size = gzip["file"].size/8 self.file_size = gzip["size"].value class ZipMetadata(MultipleMetadata): def extract(self, zip): max_nb = maxNbFile(self) for index, field in enumerate(zip.array("file")): if max_nb is not None and max_nb <= index: self.warning("ZIP archive contains many files, but only first %s files are processed" % max_nb) break self.processFile(field) @fault_tolerant def processFile(self, field): meta = Metadata(self) meta.filename = field["filename"].value meta.creation_date = field["last_mod"].value meta.compression = field["compression"].display if "data_desc" in field: meta.file_size = field["data_desc/file_uncompressed_size"].value if field["data_desc/file_compressed_size"].value: meta.compr_size = field["data_desc/file_compressed_size"].value else: meta.file_size = field["uncompressed_size"].value if field["compressed_size"].value: meta.compr_size = field["compressed_size"].value computeCompressionRate(meta) self.addGroup(field.name, meta, "File \"%s\"" % meta.get('filename')) class TarMetadata(MultipleMetadata): def extract(self, tar): max_nb = maxNbFile(self) for index, field in enumerate(tar.array("file")): if max_nb is not None and max_nb <= index: self.warning("TAR archive contains many files, but only first %s files are processed" % max_nb) break meta = Metadata(self) self.extractFile(field, meta) if meta.has("filename"): title = _('File "%s"') % meta.getText('filename') else: title = _("File") self.addGroup(field.name, meta, title) @fault_tolerant def extractFile(self, field, meta): meta.filename = field["name"].value meta.file_attr = humanUnixAttributes(field.getOctal("mode")) meta.file_size = field.getOctal("size") try: if field.getOctal("mtime"): meta.last_modification = field.getDatetime() except ValueError: pass meta.file_type = field["type"].display meta.author = "%s (uid=%s), group %s (gid=%s)" %\ (field["uname"].value, field.getOctal("uid"), field["gname"].value, field.getOctal("gid")) class CabMetadata(MultipleMetadata): def extract(self, cab): if "folder[0]" in cab: self.useFolder(cab["folder[0]"]) self.format_version = "Microsoft Cabinet version %s" % cab["cab_version"].display self.comment = "%s folders, %s files" % ( cab["nb_folder"].value, cab["nb_files"].value) max_nb = maxNbFile(self) for index, field in enumerate(cab.array("file")): if max_nb is not None and max_nb <= index: self.warning("CAB archive contains many files, but only first %s files are processed" % max_nb) break self.useFile(field) @fault_tolerant def useFolder(self, folder): compr = folder["compr_method"].display if folder["compr_method"].value != 0: compr += " (level %u)" % folder["compr_level"].value self.compression = compr @fault_tolerant def useFile(self, field): meta = Metadata(self) meta.filename = field["filename"].value meta.file_size = field["filesize"].value meta.creation_date = field["timestamp"].value attr = field["attributes"].value if attr != "(none)": meta.file_attr = attr if meta.has("filename"): title = _("File \"%s\"") % meta.getText('filename') else: title = _("File") self.addGroup(field.name, meta, title) class MarMetadata(MultipleMetadata): def extract(self, mar): self.comment = "Contains %s files" % mar["nb_file"].value self.format_version = "Microsoft Archive version %s" % mar["version"].value max_nb = maxNbFile(self) for index, field in enumerate(mar.array("file")): if max_nb is not None and max_nb <= index: self.warning("MAR archive contains many files, but only first %s files are processed" % max_nb) break meta = Metadata(self) meta.filename = field["filename"].value meta.compression = "None" meta.file_size = field["filesize"].value self.addGroup(field.name, meta, "File \"%s\"" % meta.getText('filename')) registerExtractor(CabFile, CabMetadata) registerExtractor(GzipParser, GzipMetadata) registerExtractor(Bzip2Parser, Bzip2Metadata) registerExtractor(TarFile, TarMetadata) registerExtractor(ZipFile, ZipMetadata) registerExtractor(MarFile, MarMetadata) hachoir-metadata-1.3.3/hachoir_metadata/metadata.py0000644000175000017500000002164211325707005021347 0ustar haypohaypo# -*- coding: utf-8 -*- from hachoir_core.compatibility import any, sorted from hachoir_core.endian import endian_name from hachoir_core.tools import makePrintable, makeUnicode from hachoir_core.dict import Dict from hachoir_core.error import error, HACHOIR_ERRORS from hachoir_core.i18n import _ from hachoir_core.log import Logger from hachoir_metadata.metadata_item import ( MIN_PRIORITY, MAX_PRIORITY, QUALITY_NORMAL) from hachoir_metadata.register import registerAllItems extractors = {} class Metadata(Logger): header = u"Metadata" def __init__(self, parent, quality=QUALITY_NORMAL): assert isinstance(self.header, unicode) # Limit to 0.0 .. 1.0 if parent: quality = parent.quality else: quality = min(max(0.0, quality), 1.0) object.__init__(self) object.__setattr__(self, "_Metadata__data", {}) object.__setattr__(self, "quality", quality) header = self.__class__.header object.__setattr__(self, "_Metadata__header", header) registerAllItems(self) def _logger(self): pass def __setattr__(self, key, value): """ Add a new value to data with name 'key'. Skip duplicates. """ # Invalid key? if key not in self.__data: raise KeyError(_("%s has no metadata '%s'") % (self.__class__.__name__, key)) # Skip duplicates self.__data[key].add(value) def setHeader(self, text): object.__setattr__(self, "header", text) def getItems(self, key): try: return self.__data[key] except LookupError: raise ValueError("Metadata has no value '%s'" % key) def getItem(self, key, index): try: return self.getItems(key)[index] except (LookupError, ValueError): return None def has(self, key): return 1 <= len(self.getItems(key)) def get(self, key, default=None, index=0): """ Read first value of tag with name 'key'. >>> from datetime import timedelta >>> a = RootMetadata() >>> a.duration = timedelta(seconds=2300) >>> a.get('duration') datetime.timedelta(0, 2300) >>> a.get('author', u'Anonymous') u'Anonymous' """ item = self.getItem(key, index) if item is None: if default is None: raise ValueError("Metadata has no value '%s' (index %s)" % (key, index)) else: return default return item.value def getValues(self, key): try: data = self.__data[key] except LookupError: raise ValueError("Metadata has no value '%s'" % key) return [ item.value for item in data ] def getText(self, key, default=None, index=0): """ Read first value, as unicode string, of tag with name 'key'. >>> from datetime import timedelta >>> a = RootMetadata() >>> a.duration = timedelta(seconds=2300) >>> a.getText('duration') u'38 min 20 sec' >>> a.getText('titre', u'Unknown') u'Unknown' """ item = self.getItem(key, index) if item is not None: return item.text else: return default def register(self, data): assert data.key not in self.__data data.metadata = self self.__data[data.key] = data def __iter__(self): return self.__data.itervalues() def __str__(self): r""" Create a multi-line ASCII string (end of line is "\n") which represents all datas. >>> a = RootMetadata() >>> a.author = "haypo" >>> a.copyright = unicode("© Hachoir", "UTF-8") >>> print a Metadata: - Author: haypo - Copyright: \xa9 Hachoir @see __unicode__() and exportPlaintext() """ text = self.exportPlaintext() return "\n".join( makePrintable(line, "ASCII") for line in text ) def __unicode__(self): r""" Create a multi-line Unicode string (end of line is "\n") which represents all datas. >>> a = RootMetadata() >>> a.copyright = unicode("© Hachoir", "UTF-8") >>> print repr(unicode(a)) u'Metadata:\n- Copyright: \xa9 Hachoir' @see __str__() and exportPlaintext() """ return "\n".join(self.exportPlaintext()) def exportPlaintext(self, priority=None, human=True, line_prefix=u"- ", title=None): r""" Convert metadata to multi-line Unicode string and skip datas with priority lower than specified priority. Default priority is Metadata.MAX_PRIORITY. If human flag is True, data key are translated to better human name (eg. "bit_rate" becomes "Bit rate") which may be translated using gettext. If priority is too small, metadata are empty and so None is returned. >>> print RootMetadata().exportPlaintext() None >>> meta = RootMetadata() >>> meta.copyright = unicode("© Hachoir", "UTF-8") >>> print repr(meta.exportPlaintext()) [u'Metadata:', u'- Copyright: \xa9 Hachoir'] @see __str__() and __unicode__() """ if priority is not None: priority = max(priority, MIN_PRIORITY) priority = min(priority, MAX_PRIORITY) else: priority = MAX_PRIORITY if not title: title = self.header text = ["%s:" % title] for data in sorted(self): if priority < data.priority: break if not data.values: continue if human: title = data.description else: title = data.key for item in data.values: if human: value = item.text else: value = makeUnicode(item.value) text.append("%s%s: %s" % (line_prefix, title, value)) if 1 < len(text): return text else: return None def __nonzero__(self): return any(item for item in self.__data.itervalues()) class RootMetadata(Metadata): def __init__(self, quality=QUALITY_NORMAL): Metadata.__init__(self, None, quality) class MultipleMetadata(RootMetadata): header = _("Common") def __init__(self, quality=QUALITY_NORMAL): RootMetadata.__init__(self, quality) object.__setattr__(self, "_MultipleMetadata__groups", Dict()) object.__setattr__(self, "_MultipleMetadata__key_counter", {}) def __contains__(self, key): return key in self.__groups def __getitem__(self, key): return self.__groups[key] def iterGroups(self): return self.__groups.itervalues() def __nonzero__(self): if RootMetadata.__nonzero__(self): return True return any(bool(group) for group in self.__groups) def addGroup(self, key, metadata, header=None): """ Add a new group (metadata of a sub-document). Returns False if the group is skipped, True if it has been added. """ if not metadata: self.warning("Skip empty group %s" % key) return False if key.endswith("[]"): key = key[:-2] if key in self.__key_counter: self.__key_counter[key] += 1 else: self.__key_counter[key] = 1 key += "[%u]" % self.__key_counter[key] if header: metadata.setHeader(header) self.__groups.append(key, metadata) return True def exportPlaintext(self, priority=None, human=True, line_prefix=u"- "): common = Metadata.exportPlaintext(self, priority, human, line_prefix) if common: text = common else: text = [] for key, metadata in self.__groups.iteritems(): if not human: title = key else: title = None value = metadata.exportPlaintext(priority, human, line_prefix, title=title) if value: text.extend(value) if len(text): return text else: return None def registerExtractor(parser, extractor): assert parser not in extractors assert issubclass(extractor, RootMetadata) extractors[parser] = extractor def extractMetadata(parser, quality=QUALITY_NORMAL): """ Create a Metadata class from a parser. Returns None if no metadata extractor does exist for the parser class. """ try: extractor = extractors[parser.__class__] except KeyError: return None metadata = extractor(quality) try: metadata.extract(parser) except HACHOIR_ERRORS, err: error("Error during metadata extraction: %s" % unicode(err)) if metadata: metadata.mime_type = parser.mime_type metadata.endian = endian_name[parser.endian] return metadata hachoir-metadata-1.3.3/hachoir_metadata/metadata_item.py0000644000175000017500000001144411251277274022374 0ustar haypohaypofrom hachoir_core.tools import makeUnicode, normalizeNewline from hachoir_core.error import HACHOIR_ERRORS from hachoir_metadata import config from hachoir_metadata.setter import normalizeString MIN_PRIORITY = 100 MAX_PRIORITY = 999 QUALITY_FASTEST = 0.0 QUALITY_FAST = 0.25 QUALITY_NORMAL = 0.5 QUALITY_GOOD = 0.75 QUALITY_BEST = 1.0 class DataValue: def __init__(self, value, text): self.value = value self.text = text class Data: def __init__(self, key, priority, description, text_handler=None, type=None, filter=None, conversion=None): """ handler is only used if value is not string nor unicode, prototype: def handler(value) -> str/unicode """ assert MIN_PRIORITY <= priority <= MAX_PRIORITY assert isinstance(description, unicode) self.metadata = None self.key = key self.description = description self.values = [] if type and not isinstance(type, (tuple, list)): type = (type,) self.type = type self.text_handler = text_handler self.filter = filter self.priority = priority self.conversion = conversion def _createItem(self, value, text=None): if text is None: if isinstance(value, unicode): text = value elif self.text_handler: text = self.text_handler(value) assert isinstance(text, unicode) else: text = makeUnicode(value) return DataValue(value, text) def add(self, value): if isinstance(value, tuple): if len(value) != 2: raise ValueError("Data.add() only accept tuple of 2 elements: (value,text)") value, text = value else: text = None # Skip value 'None' if value is None: return if isinstance(value, (str, unicode)): value = normalizeString(value) if not value: return # Convert string to Unicode string using charset ISO-8859-1 if self.conversion: try: new_value = self.conversion(self.metadata, self.key, value) except HACHOIR_ERRORS, err: self.metadata.warning("Error during conversion of %r value: %s" % ( self.key, err)) return if new_value is None: dest_types = " or ".join(str(item.__name__) for item in self.type) self.metadata.warning("Unable to convert %s=%r (%s) to %s" % ( self.key, value, type(value).__name__, dest_types)) return if isinstance(new_value, tuple): if text: value = new_value[0] else: value, text = new_value else: value = new_value elif isinstance(value, str): value = unicode(value, "ISO-8859-1") if self.type and not isinstance(value, self.type): dest_types = " or ".join(str(item.__name__) for item in self.type) self.metadata.warning("Key %r: value %r type (%s) is not %s" % ( self.key, value, type(value).__name__, dest_types)) return # Skip empty strings if isinstance(value, unicode): value = normalizeNewline(value) if config.MAX_STR_LENGTH \ and config.MAX_STR_LENGTH < len(value): value = value[:config.MAX_STR_LENGTH] + "(...)" # Skip duplicates if value in self: return # Use filter if self.filter and not self.filter(value): self.metadata.warning("Skip value %s=%r (filter)" % (self.key, value)) return # For string, if you have "verlongtext" and "verylo", # keep the longer value if isinstance(value, unicode): for index, item in enumerate(self.values): item = item.value if not isinstance(item, unicode): continue if value.startswith(item): # Find longer value, replace the old one self.values[index] = self._createItem(value, text) return if item.startswith(value): # Find truncated value, skip it return # Add new value self.values.append(self._createItem(value, text)) def __len__(self): return len(self.values) def __getitem__(self, index): return self.values[index] def __contains__(self, value): for item in self.values: if value == item.value: return True return False def __cmp__(self, other): return cmp(self.priority, other.priority) hachoir-metadata-1.3.3/hachoir_metadata/register.py0000644000175000017500000001547711325706747021440 0ustar haypohaypofrom hachoir_core.i18n import _ from hachoir_core.tools import ( humanDuration, humanBitRate, humanFrequency, humanBitSize, humanFilesize, humanDatetime) from hachoir_core.language import Language from hachoir_metadata.filter import Filter, NumberFilter, DATETIME_FILTER from datetime import date, datetime, timedelta from hachoir_metadata.formatter import ( humanAudioChannel, humanFrameRate, humanComprRate, humanAltitude, humanPixelSize, humanDPI) from hachoir_metadata.setter import ( setDatetime, setTrackNumber, setTrackTotal, setLanguage) from hachoir_metadata.metadata_item import Data MIN_SAMPLE_RATE = 1000 # 1 kHz MAX_SAMPLE_RATE = 192000 # 192 kHz MAX_NB_CHANNEL = 8 # 8 channels MAX_WIDTH = 20000 # 20 000 pixels MAX_BIT_RATE = 500 * 1024 * 1024 # 500 Mbit/s MAX_HEIGHT = MAX_WIDTH MAX_DPI_WIDTH = 10000 MAX_DPI_HEIGHT = MAX_DPI_WIDTH MAX_NB_COLOR = 2 ** 24 # 16 million of color MAX_BITS_PER_PIXEL = 256 # 256 bits/pixel MAX_FRAME_RATE = 150 # 150 frame/sec MAX_NB_PAGE = 20000 MAX_COMPR_RATE = 1000.0 MIN_COMPR_RATE = 0.001 MAX_TRACK = 999 DURATION_FILTER = Filter(timedelta, timedelta(milliseconds=1), timedelta(days=365)) def registerAllItems(meta): meta.register(Data("title", 100, _("Title"), type=unicode)) meta.register(Data("artist", 101, _("Artist"), type=unicode)) meta.register(Data("author", 102, _("Author"), type=unicode)) meta.register(Data("music_composer", 103, _("Music composer"), type=unicode)) meta.register(Data("album", 200, _("Album"), type=unicode)) meta.register(Data("duration", 201, _("Duration"), # integer in milliseconde type=timedelta, text_handler=humanDuration, filter=DURATION_FILTER)) meta.register(Data("nb_page", 202, _("Nb page"), filter=NumberFilter(1, MAX_NB_PAGE))) meta.register(Data("music_genre", 203, _("Music genre"), type=unicode)) meta.register(Data("language", 204, _("Language"), conversion=setLanguage, type=Language)) meta.register(Data("track_number", 205, _("Track number"), conversion=setTrackNumber, filter=NumberFilter(1, MAX_TRACK), type=(int, long))) meta.register(Data("track_total", 206, _("Track total"), conversion=setTrackTotal, filter=NumberFilter(1, MAX_TRACK), type=(int, long))) meta.register(Data("organization", 210, _("Organization"), type=unicode)) meta.register(Data("version", 220, _("Version"))) meta.register(Data("width", 301, _("Image width"), filter=NumberFilter(1, MAX_WIDTH), type=(int, long), text_handler=humanPixelSize)) meta.register(Data("height", 302, _("Image height"), filter=NumberFilter(1, MAX_HEIGHT), type=(int, long), text_handler=humanPixelSize)) meta.register(Data("nb_channel", 303, _("Channel"), text_handler=humanAudioChannel, filter=NumberFilter(1, MAX_NB_CHANNEL), type=(int, long))) meta.register(Data("sample_rate", 304, _("Sample rate"), text_handler=humanFrequency, filter=NumberFilter(MIN_SAMPLE_RATE, MAX_SAMPLE_RATE), type=(int, long, float))) meta.register(Data("bits_per_sample", 305, _("Bits/sample"), text_handler=humanBitSize, filter=NumberFilter(1, 64), type=(int, long))) meta.register(Data("image_orientation", 306, _("Image orientation"))) meta.register(Data("nb_colors", 307, _("Number of colors"), filter=NumberFilter(1, MAX_NB_COLOR), type=(int, long))) meta.register(Data("bits_per_pixel", 308, _("Bits/pixel"), filter=NumberFilter(1, MAX_BITS_PER_PIXEL), type=(int, long))) meta.register(Data("filename", 309, _("File name"), type=unicode)) meta.register(Data("file_size", 310, _("File size"), text_handler=humanFilesize, type=(int, long))) meta.register(Data("pixel_format", 311, _("Pixel format"))) meta.register(Data("compr_size", 312, _("Compressed file size"), text_handler=humanFilesize, type=(int, long))) meta.register(Data("compr_rate", 313, _("Compression rate"), text_handler=humanComprRate, filter=NumberFilter(MIN_COMPR_RATE, MAX_COMPR_RATE), type=(int, long, float))) meta.register(Data("width_dpi", 320, _("Image DPI width"), filter=NumberFilter(1, MAX_DPI_WIDTH), type=(int, long), text_handler=humanDPI)) meta.register(Data("height_dpi", 321, _("Image DPI height"), filter=NumberFilter(1, MAX_DPI_HEIGHT), type=(int, long), text_handler=humanDPI)) meta.register(Data("file_attr", 400, _("File attributes"))) meta.register(Data("file_type", 401, _("File type"))) meta.register(Data("subtitle_author", 402, _("Subtitle author"), type=unicode)) meta.register(Data("creation_date", 500, _("Creation date"), text_handler=humanDatetime, filter=DATETIME_FILTER, type=(datetime, date), conversion=setDatetime)) meta.register(Data("last_modification", 501, _("Last modification"), text_handler=humanDatetime, filter=DATETIME_FILTER, type=(datetime, date), conversion=setDatetime)) meta.register(Data("latitude", 510, _("Latitude"), type=float)) meta.register(Data("longitude", 511, _("Longitude"), type=float)) meta.register(Data("altitude", 511, _("Altitude"), type=float, text_handler=humanAltitude)) meta.register(Data("location", 530, _("Location"), type=unicode)) meta.register(Data("city", 531, _("City"), type=unicode)) meta.register(Data("country", 532, _("Country"), type=unicode)) meta.register(Data("charset", 540, _("Charset"), type=unicode)) meta.register(Data("font_weight", 550, _("Font weight"))) meta.register(Data("camera_aperture", 520, _("Camera aperture"))) meta.register(Data("camera_focal", 521, _("Camera focal"))) meta.register(Data("camera_exposure", 522, _("Camera exposure"))) meta.register(Data("camera_brightness", 530, _("Camera brightness"))) meta.register(Data("camera_model", 531, _("Camera model"), type=unicode)) meta.register(Data("camera_manufacturer", 532, _("Camera manufacturer"), type=unicode)) meta.register(Data("compression", 600, _("Compression"))) meta.register(Data("copyright", 601, _("Copyright"), type=unicode)) meta.register(Data("url", 602, _("URL"), type=unicode)) meta.register(Data("frame_rate", 603, _("Frame rate"), text_handler=humanFrameRate, filter=NumberFilter(1, MAX_FRAME_RATE), type=(int, long, float))) meta.register(Data("bit_rate", 604, _("Bit rate"), text_handler=humanBitRate, filter=NumberFilter(1, MAX_BIT_RATE), type=(int, long, float))) meta.register(Data("aspect_ratio", 604, _("Aspect ratio"), type=(int, long, float))) meta.register(Data("os", 900, _("OS"), type=unicode)) meta.register(Data("producer", 901, _("Producer"), type=unicode)) meta.register(Data("comment", 902, _("Comment"), type=unicode)) meta.register(Data("format_version", 950, _("Format version"), type=unicode)) meta.register(Data("mime_type", 951, _("MIME type"), type=unicode)) meta.register(Data("endian", 952, _("Endianness"), type=unicode)) hachoir-metadata-1.3.3/hachoir_metadata/timezone.py0000644000175000017500000000161311251277274021425 0ustar haypohaypofrom datetime import tzinfo, timedelta class TimezoneUTC(tzinfo): """UTC timezone""" ZERO = timedelta(0) def utcoffset(self, dt): return TimezoneUTC.ZERO def tzname(self, dt): return u"UTC" def dst(self, dt): return TimezoneUTC.ZERO def __repr__(self): return "" class Timezone(TimezoneUTC): """Fixed offset in hour from UTC.""" def __init__(self, offset): self._offset = timedelta(minutes=offset*60) self._name = u"%+03u00" % offset def utcoffset(self, dt): return self._offset def tzname(self, dt): return self._name def __repr__(self): return "" % ( self._offset, self._name) UTC = TimezoneUTC() def createTimezone(offset): if offset: return Timezone(offset) else: return UTC hachoir-metadata-1.3.3/hachoir_metadata/qt/0000755000175000017500000000000011423160161017627 5ustar haypohaypohachoir-metadata-1.3.3/hachoir_metadata/qt/dialog.ui0000644000175000017500000000303111251277274021437 0ustar haypohaypo Form 0 0 441 412 hachoir-metadata Open 0 0 true false 0 0 Quit hachoir-metadata-1.3.3/hachoir_metadata/qt/dialog_ui.py0000644000175000017500000000460711423160157022151 0ustar haypohaypo# -*- coding: utf-8 -*- # Form implementation generated from reading ui file 'hachoir_metadata/qt/dialog.ui' # # Created: Mon Jul 26 03:10:06 2010 # by: PyQt4 UI code generator 4.7.3 # # WARNING! All changes made in this file will be lost! from PyQt4 import QtCore, QtGui class Ui_Form(object): def setupUi(self, Form): Form.setObjectName("Form") Form.resize(441, 412) self.verticalLayout = QtGui.QVBoxLayout(Form) self.verticalLayout.setObjectName("verticalLayout") self.horizontalLayout_2 = QtGui.QHBoxLayout() self.horizontalLayout_2.setObjectName("horizontalLayout_2") self.open_button = QtGui.QPushButton(Form) self.open_button.setObjectName("open_button") self.horizontalLayout_2.addWidget(self.open_button) self.files_combo = QtGui.QComboBox(Form) sizePolicy = QtGui.QSizePolicy(QtGui.QSizePolicy.Expanding, QtGui.QSizePolicy.Fixed) sizePolicy.setHorizontalStretch(0) sizePolicy.setVerticalStretch(0) sizePolicy.setHeightForWidth(self.files_combo.sizePolicy().hasHeightForWidth()) self.files_combo.setSizePolicy(sizePolicy) self.files_combo.setObjectName("files_combo") self.horizontalLayout_2.addWidget(self.files_combo) self.verticalLayout.addLayout(self.horizontalLayout_2) self.metadata_table = QtGui.QTableWidget(Form) self.metadata_table.setAlternatingRowColors(True) self.metadata_table.setShowGrid(False) self.metadata_table.setRowCount(0) self.metadata_table.setColumnCount(0) self.metadata_table.setObjectName("metadata_table") self.metadata_table.setColumnCount(0) self.metadata_table.setRowCount(0) self.verticalLayout.addWidget(self.metadata_table) self.quit_button = QtGui.QPushButton(Form) self.quit_button.setObjectName("quit_button") self.verticalLayout.addWidget(self.quit_button) self.retranslateUi(Form) QtCore.QMetaObject.connectSlotsByName(Form) def retranslateUi(self, Form): Form.setWindowTitle(QtGui.QApplication.translate("Form", "hachoir-metadata", None, QtGui.QApplication.UnicodeUTF8)) self.open_button.setText(QtGui.QApplication.translate("Form", "Open", None, QtGui.QApplication.UnicodeUTF8)) self.quit_button.setText(QtGui.QApplication.translate("Form", "Quit", None, QtGui.QApplication.UnicodeUTF8)) hachoir-metadata-1.3.3/hachoir_metadata/qt/__init__.py0000644000175000017500000000000011251277274021743 0ustar haypohaypohachoir-metadata-1.3.3/hachoir_metadata/program.py0000644000175000017500000000707611251277274021253 0ustar haypohaypofrom hachoir_metadata.metadata import RootMetadata, registerExtractor from hachoir_parser.program import ExeFile from hachoir_metadata.safe import fault_tolerant, getValue class ExeMetadata(RootMetadata): KEY_TO_ATTR = { u"ProductName": "title", u"LegalCopyright": "copyright", u"LegalTrademarks": "copyright", u"LegalTrademarks1": "copyright", u"LegalTrademarks2": "copyright", u"CompanyName": "author", u"BuildDate": "creation_date", u"FileDescription": "title", u"ProductVersion": "version", } SKIP_KEY = set((u"InternalName", u"OriginalFilename", u"FileVersion", u"BuildVersion")) def extract(self, exe): if exe.isPE(): self.extractPE(exe) elif exe.isNE(): self.extractNE(exe) def extractNE(self, exe): if "ne_header" in exe: self.useNE_Header(exe["ne_header"]) if "info" in exe: self.useNEInfo(exe["info"]) @fault_tolerant def useNEInfo(self, info): for node in info.array("node"): if node["name"].value == "StringFileInfo": self.readVersionInfo(node["node[0]"]) def extractPE(self, exe): # Read information from headers if "pe_header" in exe: self.usePE_Header(exe["pe_header"]) if "pe_opt_header" in exe: self.usePE_OptHeader(exe["pe_opt_header"]) # Use PE resource resource = exe.getResource() if resource and "version_info/node[0]" in resource: for node in resource.array("version_info/node[0]/node"): if getValue(node, "name") == "StringFileInfo" \ and "node[0]" in node: self.readVersionInfo(node["node[0]"]) @fault_tolerant def useNE_Header(self, hdr): if hdr["is_dll"].value: self.format_version = u"New-style executable: Dynamic-link library (DLL)" elif hdr["is_win_app"].value: self.format_version = u"New-style executable: Windows 3.x application" else: self.format_version = u"New-style executable for Windows 3.x" @fault_tolerant def usePE_Header(self, hdr): self.creation_date = hdr["creation_date"].value self.comment = "CPU: %s" % hdr["cpu"].display if hdr["is_dll"].value: self.format_version = u"Portable Executable: Dynamic-link library (DLL)" else: self.format_version = u"Portable Executable: Windows application" @fault_tolerant def usePE_OptHeader(self, hdr): self.comment = "Subsystem: %s" % hdr["subsystem"].display def readVersionInfo(self, info): values = {} for node in info.array("node"): if "value" not in node or "name" not in node: continue value = node["value"].value.strip(" \0") if not value: continue key = node["name"].value values[key] = value if "ProductName" in values and "FileDescription" in values: # Make sure that FileDescription is set before ProductName # as title value self.title = values["FileDescription"] self.title = values["ProductName"] del values["FileDescription"] del values["ProductName"] for key, value in values.iteritems(): if key in self.KEY_TO_ATTR: setattr(self, self.KEY_TO_ATTR[key], value) elif key not in self.SKIP_KEY: self.comment = "%s=%s" % (key, value) registerExtractor(ExeFile, ExeMetadata) hachoir-metadata-1.3.3/hachoir_metadata/riff.py0000644000175000017500000001677711251277274020542 0ustar haypohaypo""" Extract metadata from RIFF file format: AVI video and WAV sound. """ from hachoir_metadata.metadata import Metadata, MultipleMetadata, registerExtractor from hachoir_metadata.safe import fault_tolerant, getValue from hachoir_parser.container.riff import RiffFile from hachoir_parser.video.fourcc import UNCOMPRESSED_AUDIO from hachoir_core.tools import humanFilesize, makeUnicode, timedelta2seconds from hachoir_core.i18n import _ from hachoir_metadata.audio import computeComprRate as computeAudioComprRate from datetime import timedelta class RiffMetadata(MultipleMetadata): TAG_TO_KEY = { "INAM": "title", "IART": "artist", "ICMT": "comment", "ICOP": "copyright", "IENG": "author", # (engineer) "ISFT": "producer", "ICRD": "creation_date", "IDIT": "creation_date", } def extract(self, riff): type = riff["type"].value if type == "WAVE": self.extractWAVE(riff) size = getValue(riff, "audio_data/size") if size: computeAudioComprRate(self, size*8) elif type == "AVI ": if "headers" in riff: self.extractAVI(riff["headers"]) self.extractInfo(riff["headers"]) elif type == "ACON": self.extractAnim(riff) if "info" in riff: self.extractInfo(riff["info"]) def processChunk(self, chunk): if "text" not in chunk: return value = chunk["text"].value tag = chunk["tag"].value if tag not in self.TAG_TO_KEY: self.warning("Skip RIFF metadata %s: %s" % (tag, value)) return key = self.TAG_TO_KEY[tag] setattr(self, key, value) @fault_tolerant def extractWAVE(self, wav): format = wav["format"] # Number of channel, bits/sample, sample rate self.nb_channel = format["nb_channel"].value self.bits_per_sample = format["bit_per_sample"].value self.sample_rate = format["sample_per_sec"].value self.compression = format["codec"].display if "nb_sample/nb_sample" in wav \ and 0 < format["sample_per_sec"].value: self.duration = timedelta(seconds=float(wav["nb_sample/nb_sample"].value) / format["sample_per_sec"].value) if format["codec"].value in UNCOMPRESSED_AUDIO: # Codec with fixed bit rate self.bit_rate = format["nb_channel"].value * format["bit_per_sample"].value * format["sample_per_sec"].value if not self.has("duration") \ and "audio_data/size" in wav \ and self.has("bit_rate"): duration = float(wav["audio_data/size"].value)*8 / self.get('bit_rate') self.duration = timedelta(seconds=duration) def extractInfo(self, fieldset): for field in fieldset: if not field.is_field_set: continue if "tag" in field: if field["tag"].value == "LIST": self.extractInfo(field) else: self.processChunk(field) @fault_tolerant def extractAVIVideo(self, header, meta): meta.compression = "%s (fourcc:\"%s\")" \ % (header["fourcc"].display, makeUnicode(header["fourcc"].value)) if header["rate"].value and header["scale"].value: fps = float(header["rate"].value) / header["scale"].value meta.frame_rate = fps if 0 < fps: self.duration = meta.duration = timedelta(seconds=float(header["length"].value) / fps) if "../stream_fmt/width" in header: format = header["../stream_fmt"] meta.width = format["width"].value meta.height = format["height"].value meta.bits_per_pixel = format["depth"].value else: meta.width = header["right"].value - header["left"].value meta.height = header["bottom"].value - header["top"].value @fault_tolerant def extractAVIAudio(self, format, meta): meta.nb_channel = format["channel"].value meta.sample_rate = format["sample_rate"].value meta.bit_rate = format["bit_rate"].value * 8 if format["bits_per_sample"].value: meta.bits_per_sample = format["bits_per_sample"].value if "../stream_hdr" in format: header = format["../stream_hdr"] if header["rate"].value and header["scale"].value: frame_rate = float(header["rate"].value) / header["scale"].value meta.duration = timedelta(seconds=float(header["length"].value) / frame_rate) if header["fourcc"].value != "": meta.compression = "%s (fourcc:\"%s\")" \ % (format["codec"].display, header["fourcc"].value) if not meta.has("compression"): meta.compression = format["codec"].display self.computeAudioComprRate(meta) @fault_tolerant def computeAudioComprRate(self, meta): uncompr = meta.get('bit_rate', 0) if not uncompr: return compr = meta.get('nb_channel') * meta.get('sample_rate') * meta.get('bits_per_sample', default=16) if not compr: return meta.compr_rate = float(compr) / uncompr @fault_tolerant def useAviHeader(self, header): microsec = header["microsec_per_frame"].value if microsec: self.frame_rate = 1000000.0 / microsec total_frame = getValue(header, "total_frame") if total_frame and not self.has("duration"): self.duration = timedelta(microseconds=total_frame * microsec) self.width = header["width"].value self.height = header["height"].value def extractAVI(self, headers): audio_index = 1 for stream in headers.array("stream"): if "stream_hdr/stream_type" not in stream: continue stream_type = stream["stream_hdr/stream_type"].value if stream_type == "vids": if "stream_hdr" in stream: meta = Metadata(self) self.extractAVIVideo(stream["stream_hdr"], meta) self.addGroup("video", meta, "Video stream") elif stream_type == "auds": if "stream_fmt" in stream: meta = Metadata(self) self.extractAVIAudio(stream["stream_fmt"], meta) self.addGroup("audio[%u]" % audio_index, meta, "Audio stream") audio_index += 1 if "avi_hdr" in headers: self.useAviHeader(headers["avi_hdr"]) # Compute global bit rate if self.has("duration") and "/movie/size" in headers: self.bit_rate = float(headers["/movie/size"].value) * 8 / timedelta2seconds(self.get('duration')) # Video has index? if "/index" in headers: self.comment = _("Has audio/video index (%s)") \ % humanFilesize(headers["/index"].size/8) @fault_tolerant def extractAnim(self, riff): if "anim_rate/rate[0]" in riff: count = 0 total = 0 for rate in riff.array("anim_rate/rate"): count += 1 if 100 < count: break total += rate.value / 60.0 if count and total: self.frame_rate = count / total if not self.has("frame_rate") and "anim_hdr/jiffie_rate" in riff: self.frame_rate = 60.0 / riff["anim_hdr/jiffie_rate"].value registerExtractor(RiffFile, RiffMetadata) hachoir-metadata-1.3.3/hachoir_metadata/__init__.py0000644000175000017500000000072011251277274021330 0ustar haypohaypofrom hachoir_metadata.version import VERSION as __version__ from hachoir_metadata.metadata import extractMetadata # Just import the module, # each module use registerExtractor() method import hachoir_metadata.archive import hachoir_metadata.audio import hachoir_metadata.file_system import hachoir_metadata.image import hachoir_metadata.jpeg import hachoir_metadata.misc import hachoir_metadata.program import hachoir_metadata.riff import hachoir_metadata.video hachoir-metadata-1.3.3/hachoir_metadata/version.py0000644000175000017500000000021411332532750021245 0ustar haypohaypoPACKAGE = "hachoir-metadata" VERSION = "1.3.3" WEBSITE = "http://bitbucket.org/haypo/hachoir/wiki/hachoir-metadata" LICENSE = "GNU GPL v2" hachoir-metadata-1.3.3/hachoir_metadata/filter.py0000644000175000017500000000320011325706572021052 0ustar haypohaypofrom hachoir_metadata.timezone import UTC from datetime import date, datetime # Year in 1850..2030 MIN_YEAR = 1850 MAX_YEAR = 2030 class Filter: def __init__(self, valid_types, min=None, max=None): self.types = valid_types self.min = min self.max = max def __call__(self, value): if not isinstance(value, self.types): return True if self.min is not None and value < self.min: return False if self.max is not None and self.max < value: return False return True class NumberFilter(Filter): def __init__(self, min=None, max=None): Filter.__init__(self, (int, long, float), min, max) class DatetimeFilter(Filter): def __init__(self, min=None, max=None): Filter.__init__(self, (date, datetime), datetime(MIN_YEAR, 1, 1), datetime(MAX_YEAR, 12, 31)) self.min_date = date(MIN_YEAR, 1, 1) self.max_date = date(MAX_YEAR, 12, 31) self.min_tz = datetime(MIN_YEAR, 1, 1, tzinfo=UTC) self.max_tz = datetime(MAX_YEAR, 12, 31, tzinfo=UTC) def __call__(self, value): """ Use different min/max values depending on value type (datetime with timezone, datetime or date). """ if not isinstance(value, self.types): return True if hasattr(value, "tzinfo") and value.tzinfo: return (self.min_tz <= value <= self.max_tz) elif isinstance(value, datetime): return (self.min <= value <= self.max) else: return (self.min_date <= value <= self.max_date) DATETIME_FILTER = DatetimeFilter() hachoir-metadata-1.3.3/hachoir_metadata/video.py0000644000175000017500000003625011422402176020675 0ustar haypohaypofrom hachoir_core.field import MissingField from hachoir_metadata.metadata import (registerExtractor, Metadata, RootMetadata, MultipleMetadata) from hachoir_metadata.metadata_item import QUALITY_GOOD from hachoir_metadata.safe import fault_tolerant from hachoir_parser.video import MovFile, AsfFile, FlvFile from hachoir_parser.video.asf import Descriptor as ASF_Descriptor from hachoir_parser.container import MkvFile from hachoir_parser.container.mkv import dateToDatetime from hachoir_core.i18n import _ from hachoir_core.tools import makeUnicode, makePrintable, timedelta2seconds from datetime import timedelta class MkvMetadata(MultipleMetadata): tag_key = { "TITLE": "title", "URL": "url", "COPYRIGHT": "copyright", # TODO: use maybe another name? # Its value may be different than (...)/Info/DateUTC/date "DATE_RECORDED": "creation_date", # TODO: Extract subtitle metadata "SUBTITLE": "subtitle_author", } def extract(self, mkv): for segment in mkv.array("Segment"): self.processSegment(segment) def processSegment(self, segment): for field in segment: if field.name.startswith("Info["): self.processInfo(field) elif field.name.startswith("Tags["): for tag in field.array("Tag"): self.processTag(tag) elif field.name.startswith("Tracks["): self.processTracks(field) elif field.name.startswith("Cluster["): if self.quality < QUALITY_GOOD: return def processTracks(self, tracks): for entry in tracks.array("TrackEntry"): self.processTrack(entry) def processTrack(self, track): if "TrackType/enum" not in track: return if track["TrackType/enum"].display == "video": self.processVideo(track) elif track["TrackType/enum"].display == "audio": self.processAudio(track) elif track["TrackType/enum"].display == "subtitle": self.processSubtitle(track) def trackCommon(self, track, meta): if "Name/unicode" in track: meta.title = track["Name/unicode"].value if "Language/string" in track \ and track["Language/string"].value not in ("mis", "und"): meta.language = track["Language/string"].value def processVideo(self, track): video = Metadata(self) self.trackCommon(track, video) try: video.compression = track["CodecID/string"].value if "Video" in track: video.width = track["Video/PixelWidth/unsigned"].value video.height = track["Video/PixelHeight/unsigned"].value except MissingField: pass self.addGroup("video[]", video, "Video stream") def getDouble(self, field, parent): float_key = '%s/float' % parent if float_key in field: return field[float_key].value double_key = '%s/double' % parent if double_key in field: return field[double_key].value return None def processAudio(self, track): audio = Metadata(self) self.trackCommon(track, audio) if "Audio" in track: frequency = self.getDouble(track, "Audio/SamplingFrequency") if frequency is not None: audio.sample_rate = frequency if "Audio/Channels/unsigned" in track: audio.nb_channel = track["Audio/Channels/unsigned"].value if "Audio/BitDepth/unsigned" in track: audio.bits_per_sample = track["Audio/BitDepth/unsigned"].value if "CodecID/string" in track: audio.compression = track["CodecID/string"].value self.addGroup("audio[]", audio, "Audio stream") def processSubtitle(self, track): sub = Metadata(self) self.trackCommon(track, sub) try: sub.compression = track["CodecID/string"].value except MissingField: pass self.addGroup("subtitle[]", sub, "Subtitle") def processTag(self, tag): for field in tag.array("SimpleTag"): self.processSimpleTag(field) def processSimpleTag(self, tag): if "TagName/unicode" not in tag \ or "TagString/unicode" not in tag: return name = tag["TagName/unicode"].value if name not in self.tag_key: return key = self.tag_key[name] value = tag["TagString/unicode"].value setattr(self, key, value) def processInfo(self, info): if "TimecodeScale/unsigned" in info: duration = self.getDouble(info, "Duration") if duration is not None: try: seconds = duration * info["TimecodeScale/unsigned"].value * 1e-9 self.duration = timedelta(seconds=seconds) except OverflowError: # Catch OverflowError for timedelta (long int too large # to be converted to an int) pass if "DateUTC/date" in info: try: self.creation_date = dateToDatetime(info["DateUTC/date"].value) except OverflowError: pass if "WritingApp/unicode" in info: self.producer = info["WritingApp/unicode"].value if "MuxingApp/unicode" in info: self.producer = info["MuxingApp/unicode"].value if "Title/unicode" in info: self.title = info["Title/unicode"].value class FlvMetadata(MultipleMetadata): def extract(self, flv): if "video[0]" in flv: meta = Metadata(self) self.extractVideo(flv["video[0]"], meta) self.addGroup("video", meta, "Video stream") if "audio[0]" in flv: meta = Metadata(self) self.extractAudio(flv["audio[0]"], meta) self.addGroup("audio", meta, "Audio stream") # TODO: Computer duration # One technic: use last video/audio chunk and use timestamp # But this is very slow self.format_version = flv.description if "metadata/entry[1]" in flv: self.extractAMF(flv["metadata/entry[1]"]) if self.has('duration'): self.bit_rate = flv.size / timedelta2seconds(self.get('duration')) @fault_tolerant def extractAudio(self, audio, meta): if audio["codec"].display == "MP3" and "music_data" in audio: meta.compression = audio["music_data"].description else: meta.compression = audio["codec"].display meta.sample_rate = audio.getSampleRate() if audio["is_16bit"].value: meta.bits_per_sample = 16 else: meta.bits_per_sample = 8 if audio["is_stereo"].value: meta.nb_channel = 2 else: meta.nb_channel = 1 @fault_tolerant def extractVideo(self, video, meta): meta.compression = video["codec"].display def extractAMF(self, amf): for entry in amf.array("item"): self.useAmfEntry(entry) @fault_tolerant def useAmfEntry(self, entry): key = entry["key"].value if key == "duration": self.duration = timedelta(seconds=entry["value"].value) elif key == "creator": self.producer = entry["value"].value elif key == "audiosamplerate": self.sample_rate = entry["value"].value elif key == "framerate": self.frame_rate = entry["value"].value elif key == "metadatacreator": self.producer = entry["value"].value elif key == "metadatadate": self.creation_date = entry.value elif key == "width": self.width = int(entry["value"].value) elif key == "height": self.height = int(entry["value"].value) class MovMetadata(RootMetadata): def extract(self, mov): for atom in mov: if "movie" in atom: self.processMovie(atom["movie"]) @fault_tolerant def processMovieHeader(self, hdr): self.creation_date = hdr["creation_date"].value self.last_modification = hdr["lastmod_date"].value self.duration = timedelta(seconds=float(hdr["duration"].value) / hdr["time_scale"].value) self.comment = _("Play speed: %.1f%%") % (hdr["play_speed"].value*100) self.comment = _("User volume: %.1f%%") % (float(hdr["volume"].value)*100//255) @fault_tolerant def processTrackHeader(self, hdr): width = int(hdr["frame_size_width"].value) height = int(hdr["frame_size_height"].value) if width and height: self.width = width self.height = height def processTrack(self, atom): for field in atom: if "track_hdr" in field: self.processTrackHeader(field["track_hdr"]) def processMovie(self, atom): for field in atom: if "track" in field: self.processTrack(field["track"]) if "movie_hdr" in field: self.processMovieHeader(field["movie_hdr"]) class AsfMetadata(MultipleMetadata): EXT_DESC_TO_ATTR = { "Encoder": "producer", "ToolName": "producer", "AlbumTitle": "album", "Track": "track_number", "TrackNumber": "track_total", "Year": "creation_date", "AlbumArtist": "author", } SKIP_EXT_DESC = set(( # Useless informations "WMFSDKNeeded", "WMFSDKVersion", "Buffer Average", "VBR Peak", "EncodingTime", "MediaPrimaryClassID", "UniqueFileIdentifier", )) def extract(self, asf): if "header/content" in asf: self.processHeader(asf["header/content"]) def processHeader(self, header): compression = [] is_vbr = None if "ext_desc/content" in header: # Extract all data from ext_desc data = {} for desc in header.array("ext_desc/content/descriptor"): self.useExtDescItem(desc, data) # Have ToolName and ToolVersion? If yes, group them to producer key if "ToolName" in data and "ToolVersion" in data: self.producer = "%s (version %s)" % (data["ToolName"], data["ToolVersion"]) del data["ToolName"] del data["ToolVersion"] # "IsVBR" key if "IsVBR" in data: is_vbr = (data["IsVBR"] == 1) del data["IsVBR"] # Store data for key, value in data.iteritems(): if key in self.EXT_DESC_TO_ATTR: key = self.EXT_DESC_TO_ATTR[key] else: if isinstance(key, str): key = makePrintable(key, "ISO-8859-1", to_unicode=True) value = "%s=%s" % (key, value) key = "comment" setattr(self, key, value) if "file_prop/content" in header: self.useFileProp(header["file_prop/content"], is_vbr) if "codec_list/content" in header: for codec in header.array("codec_list/content/codec"): if "name" in codec: text = codec["name"].value if "desc" in codec and codec["desc"].value: text = "%s (%s)" % (text, codec["desc"].value) compression.append(text) audio_index = 1 video_index = 1 for index, stream_prop in enumerate(header.array("stream_prop")): if "content/audio_header" in stream_prop: meta = Metadata(self) self.streamProperty(header, index, meta) self.streamAudioHeader(stream_prop["content/audio_header"], meta) if self.addGroup("audio[%u]" % audio_index, meta, "Audio stream #%u" % audio_index): audio_index += 1 elif "content/video_header" in stream_prop: meta = Metadata(self) self.streamProperty(header, index, meta) self.streamVideoHeader(stream_prop["content/video_header"], meta) if self.addGroup("video[%u]" % video_index, meta, "Video stream #%u" % video_index): video_index += 1 if "metadata/content" in header: info = header["metadata/content"] try: self.title = info["title"].value self.author = info["author"].value self.copyright = info["copyright"].value except MissingField: pass @fault_tolerant def streamAudioHeader(self, audio, meta): if not meta.has("compression"): meta.compression = audio["twocc"].display meta.nb_channel = audio["channels"].value meta.sample_rate = audio["sample_rate"].value meta.bits_per_sample = audio["bits_per_sample"].value @fault_tolerant def streamVideoHeader(self, video, meta): meta.width = video["width"].value meta.height = video["height"].value if "bmp_info" in video: bmp_info = video["bmp_info"] if not meta.has("compression"): meta.compression = bmp_info["codec"].display meta.bits_per_pixel = bmp_info["bpp"].value @fault_tolerant def useExtDescItem(self, desc, data): if desc["type"].value == ASF_Descriptor.TYPE_BYTE_ARRAY: # Skip binary data return key = desc["name"].value if "/" in key: # Replace "WM/ToolName" with "ToolName" key = key.split("/", 1)[1] if key in self.SKIP_EXT_DESC: # Skip some keys return value = desc["value"].value if not value: return value = makeUnicode(value) data[key] = value @fault_tolerant def useFileProp(self, prop, is_vbr): self.creation_date = prop["creation_date"].value self.duration = prop["play_duration"].value if prop["seekable"].value: self.comment = u"Is seekable" value = prop["max_bitrate"].value text = prop["max_bitrate"].display if is_vbr is True: text = "VBR (%s max)" % text elif is_vbr is False: text = "%s (CBR)" % text else: text = "%s (max)" % text self.bit_rate = (value, text) def streamProperty(self, header, index, meta): key = "bit_rates/content/bit_rate[%u]/avg_bitrate" % index if key in header: meta.bit_rate = header[key].value # TODO: Use codec list # It doesn't work when the video uses /header/content/bitrate_mutex # since the codec list are shared between streams but... how is it # shared? # key = "codec_list/content/codec[%u]" % index # if key in header: # codec = header[key] # if "name" in codec: # text = codec["name"].value # if "desc" in codec and codec["desc"].value: # meta.compression = "%s (%s)" % (text, codec["desc"].value) # else: # meta.compression = text registerExtractor(MovFile, MovMetadata) registerExtractor(AsfFile, AsfMetadata) registerExtractor(FlvFile, FlvMetadata) registerExtractor(MkvFile, MkvMetadata) hachoir-metadata-1.3.3/hachoir_metadata/setter.py0000644000175000017500000001212311251277274021077 0ustar haypohaypofrom datetime import date, datetime import re from hachoir_core.language import Language from locale import setlocale, LC_ALL from time import strptime from hachoir_metadata.timezone import createTimezone from hachoir_metadata import config NORMALIZE_REGEX = re.compile("[-/.: ]+") YEAR_REGEX1 = re.compile("^([0-9]{4})$") # Date regex: YYYY-MM-DD (US format) DATE_REGEX1 = re.compile("^([0-9]{4})~([01][0-9])~([0-9]{2})$") # Date regex: YYYY-MM-DD HH:MM:SS (US format) DATETIME_REGEX1 = re.compile("^([0-9]{4})~([01][0-9])~([0-9]{2})~([0-9]{1,2})~([0-9]{2})~([0-9]{2})$") # Datetime regex: "MM-DD-YYYY HH:MM:SS" (FR format) DATETIME_REGEX2 = re.compile("^([01]?[0-9])~([0-9]{2})~([0-9]{4})~([0-9]{1,2})~([0-9]{2})~([0-9]{2})$") # Timezone regex: "(...) +0200" TIMEZONE_REGEX = re.compile("^(.*)~([+-][0-9]{2})00$") # Timestmap: 'February 2007' MONTH_YEAR = "%B~%Y" # Timestmap: 'Sun Feb 24 15:51:09 2008' RIFF_TIMESTAMP = "%a~%b~%d~%H~%M~%S~%Y" # Timestmap: 'Thu, 19 Jul 2007 09:03:57' ISO_TIMESTAMP = "%a,~%d~%b~%Y~%H~%M~%S" def parseDatetime(value): """ Year and date: >>> parseDatetime("2000") (datetime.date(2000, 1, 1), u'2000') >>> parseDatetime("2004-01-02") datetime.date(2004, 1, 2) Timestamp: >>> parseDatetime("2004-01-02 18:10:45") datetime.datetime(2004, 1, 2, 18, 10, 45) >>> parseDatetime("2004-01-02 18:10:45") datetime.datetime(2004, 1, 2, 18, 10, 45) Timestamp with timezone: >>> parseDatetime(u'Thu, 19 Jul 2007 09:03:57 +0000') datetime.datetime(2007, 7, 19, 9, 3, 57, tzinfo=) >>> parseDatetime(u'Thu, 19 Jul 2007 09:03:57 +0200') datetime.datetime(2007, 7, 19, 9, 3, 57, tzinfo=) """ value = NORMALIZE_REGEX.sub("~", value.strip()) regs = YEAR_REGEX1.match(value) if regs: try: year = int(regs.group(1)) return (date(year, 1, 1), unicode(year)) except ValueError: pass regs = DATE_REGEX1.match(value) if regs: try: year = int(regs.group(1)) month = int(regs.group(2)) day = int(regs.group(3)) return date(year, month, day) except ValueError: pass regs = DATETIME_REGEX1.match(value) if regs: try: year = int(regs.group(1)) month = int(regs.group(2)) day = int(regs.group(3)) hour = int(regs.group(4)) min = int(regs.group(5)) sec = int(regs.group(6)) return datetime(year, month, day, hour, min, sec) except ValueError: pass regs = DATETIME_REGEX2.match(value) if regs: try: month = int(regs.group(1)) day = int(regs.group(2)) year = int(regs.group(3)) hour = int(regs.group(4)) min = int(regs.group(5)) sec = int(regs.group(6)) return datetime(year, month, day, hour, min, sec) except ValueError: pass current_locale = setlocale(LC_ALL, "C") try: match = TIMEZONE_REGEX.match(value) if match: without_timezone = match.group(1) delta = int(match.group(2)) delta = createTimezone(delta) else: without_timezone = value delta = None try: timestamp = strptime(without_timezone, ISO_TIMESTAMP) arguments = list(timestamp[0:6]) + [0, delta] return datetime(*arguments) except ValueError: pass try: timestamp = strptime(without_timezone, RIFF_TIMESTAMP) arguments = list(timestamp[0:6]) + [0, delta] return datetime(*arguments) except ValueError: pass try: timestamp = strptime(value, MONTH_YEAR) arguments = list(timestamp[0:3]) return date(*arguments) except ValueError: pass finally: setlocale(LC_ALL, current_locale) return None def setDatetime(meta, key, value): if isinstance(value, (str, unicode)): return parseDatetime(value) elif isinstance(value, (date, datetime)): return value return None def setLanguage(meta, key, value): """ >>> setLanguage(None, None, "fre") >>> setLanguage(None, None, u"ger") """ return Language(value) def setTrackTotal(meta, key, total): """ >>> setTrackTotal(None, None, "10") 10 """ try: return int(total) except ValueError: meta.warning("Invalid track total: %r" % total) return None def setTrackNumber(meta, key, number): if isinstance(number, (int, long)): return number if "/" in number: number, total = number.split("/", 1) meta.track_total = total try: return int(number) except ValueError: meta.warning("Invalid track number: %r" % number) return None def normalizeString(text): if config.RAW_OUTPUT: return text return text.strip(" \t\v\n\r\0") hachoir-metadata-1.3.3/hachoir_metadata/safe.py0000644000175000017500000000144711325706602020510 0ustar haypohaypofrom hachoir_core.error import HACHOIR_ERRORS, warning def fault_tolerant(func, *args): def safe_func(*args, **kw): try: func(*args, **kw) except HACHOIR_ERRORS, err: warning("Error when calling function %s(): %s" % ( func.__name__, err)) return safe_func def getFieldAttribute(fieldset, key, attrname): try: field = fieldset[key] if field.hasValue(): return getattr(field, attrname) except HACHOIR_ERRORS, err: warning("Unable to get %s of field %s/%s: %s" % ( attrname, fieldset.path, key, err)) return None def getValue(fieldset, key): return getFieldAttribute(fieldset, key, "value") def getDisplay(fieldset, key): return getFieldAttribute(fieldset, key, "display") hachoir-metadata-1.3.3/hachoir_metadata/audio.py0000644000175000017500000003615311422377430020676 0ustar haypohaypofrom hachoir_metadata.metadata import (registerExtractor, Metadata, RootMetadata, MultipleMetadata) from hachoir_parser.audio import AuFile, MpegAudioFile, RealAudioFile, AiffFile, FlacParser from hachoir_parser.container import OggFile, RealMediaFile from hachoir_core.i18n import _ from hachoir_core.tools import makePrintable, timedelta2seconds, humanBitRate from datetime import timedelta from hachoir_metadata.metadata_item import QUALITY_FAST, QUALITY_NORMAL, QUALITY_BEST from hachoir_metadata.safe import fault_tolerant, getValue def computeComprRate(meta, size): if not meta.has("duration") \ or not meta.has("sample_rate") \ or not meta.has("bits_per_sample") \ or not meta.has("nb_channel") \ or not size: return orig_size = timedelta2seconds(meta.get("duration")) * meta.get('sample_rate') * meta.get('bits_per_sample') * meta.get('nb_channel') meta.compr_rate = float(orig_size) / size def computeBitRate(meta): if not meta.has("bits_per_sample") \ or not meta.has("nb_channel") \ or not meta.has("sample_rate"): return meta.bit_rate = meta.get('bits_per_sample') * meta.get('nb_channel') * meta.get('sample_rate') VORBIS_KEY_TO_ATTR = { "ARTIST": "artist", "ALBUM": "album", "TRACKNUMBER": "track_number", "TRACKTOTAL": "track_total", "ENCODER": "producer", "TITLE": "title", "LOCATION": "location", "DATE": "creation_date", "ORGANIZATION": "organization", "GENRE": "music_genre", "": "comment", "COMPOSER": "music_composer", "DESCRIPTION": "comment", "COMMENT": "comment", "WWW": "url", "WOAF": "url", "LICENSE": "copyright", } @fault_tolerant def readVorbisComment(metadata, comment): metadata.producer = getValue(comment, "vendor") for item in comment.array("metadata"): if "=" in item.value: key, value = item.value.split("=", 1) key = key.upper() if key in VORBIS_KEY_TO_ATTR: key = VORBIS_KEY_TO_ATTR[key] setattr(metadata, key, value) elif value: metadata.warning("Skip Vorbis comment %s: %s" % (key, value)) class OggMetadata(MultipleMetadata): def extract(self, ogg): granule_quotient = None for index, page in enumerate(ogg.array("page")): if "segments" not in page: continue page = page["segments"] if "vorbis_hdr" in page: meta = Metadata(self) self.vorbisHeader(page["vorbis_hdr"], meta) self.addGroup("audio[]", meta, "Audio") if not granule_quotient and meta.has("sample_rate"): granule_quotient = meta.get('sample_rate') if "theora_hdr" in page: meta = Metadata(self) self.theoraHeader(page["theora_hdr"], meta) self.addGroup("video[]", meta, "Video") if "video_hdr" in page: meta = Metadata(self) self.videoHeader(page["video_hdr"], meta) self.addGroup("video[]", meta, "Video") if not granule_quotient and meta.has("frame_rate"): granule_quotient = meta.get('frame_rate') if "comment" in page: readVorbisComment(self, page["comment"]) if 3 <= index: # Only process pages 0..3 break # Compute duration if granule_quotient and QUALITY_NORMAL <= self.quality: page = ogg.createLastPage() if page and "abs_granule_pos" in page: try: self.duration = timedelta(seconds=float(page["abs_granule_pos"].value) / granule_quotient) except OverflowError: pass def videoHeader(self, header, meta): meta.compression = header["fourcc"].display meta.width = header["width"].value meta.height = header["height"].value meta.bits_per_pixel = header["bits_per_sample"].value if header["time_unit"].value: meta.frame_rate = 10000000.0 / header["time_unit"].value def theoraHeader(self, header, meta): meta.compression = "Theora" meta.format_version = "Theora version %u.%u (revision %u)" % (\ header["version_major"].value, header["version_minor"].value, header["version_revision"].value) meta.width = header["frame_width"].value meta.height = header["frame_height"].value if header["fps_den"].value: meta.frame_rate = float(header["fps_num"].value) / header["fps_den"].value if header["aspect_ratio_den"].value: meta.aspect_ratio = float(header["aspect_ratio_num"].value) / header["aspect_ratio_den"].value meta.pixel_format = header["pixel_format"].display meta.comment = "Quality: %s" % header["quality"].value def vorbisHeader(self, header, meta): meta.compression = u"Vorbis" meta.sample_rate = header["audio_sample_rate"].value meta.nb_channel = header["audio_channels"].value meta.format_version = u"Vorbis version %s" % header["vorbis_version"].value meta.bit_rate = header["bitrate_nominal"].value class AuMetadata(RootMetadata): def extract(self, audio): self.sample_rate = audio["sample_rate"].value self.nb_channel = audio["channels"].value self.compression = audio["codec"].display if "info" in audio: self.comment = audio["info"].value self.bits_per_sample = audio.getBitsPerSample() computeBitRate(self) if "audio_data" in audio: if self.has("bit_rate"): self.duration = timedelta(seconds=float(audio["audio_data"].size) / self.get('bit_rate')) computeComprRate(self, audio["audio_data"].size) class RealAudioMetadata(RootMetadata): FOURCC_TO_BITRATE = { u"28_8": 15200, # 28.8 kbit/sec (audio bit rate: 15.2 kbit/s) u"14_4": 8000, # 14.4 kbit/sec u"lpcJ": 8000, # 14.4 kbit/sec } def extract(self, real): version = real["version"].value if "metadata" in real: self.useMetadata(real["metadata"]) self.useRoot(real) self.format_version = "Real audio version %s" % version if version == 3: size = getValue(real, "data_size") elif "filesize" in real and "headersize" in real: size = (real["filesize"].value + 40) - (real["headersize"].value + 16) else: size = None if size: size *= 8 if self.has("bit_rate"): sec = float(size) / self.get('bit_rate') self.duration = timedelta(seconds=sec) computeComprRate(self, size) @fault_tolerant def useMetadata(self, info): self.title = info["title"].value self.author = info["author"].value self.copyright = info["copyright"].value self.comment = info["comment"].value @fault_tolerant def useRoot(self, real): self.bits_per_sample = 16 # FIXME: Is that correct? if real["version"].value != 3: self.sample_rate = real["sample_rate"].value self.nb_channel = real["channels"].value else: self.sample_rate = 8000 self.nb_channel = 1 fourcc = getValue(real, "FourCC") if fourcc: self.compression = fourcc try: self.bit_rate = self.FOURCC_TO_BITRATE[fourcc] except LookupError: pass class RealMediaMetadata(MultipleMetadata): KEY_TO_ATTR = { "generated by": "producer", "creation date": "creation_date", "modification date": "last_modification", "description": "comment", } def extract(self, media): if "file_prop" in media: self.useFileProp(media["file_prop"]) if "content_desc" in media: self.useContentDesc(media["content_desc"]) for index, stream in enumerate(media.array("stream_prop")): self.useStreamProp(stream, index) @fault_tolerant def useFileInfoProp(self, prop): key = prop["name"].value.lower() value = prop["value"].value if key in self.KEY_TO_ATTR: setattr(self, self.KEY_TO_ATTR[key], value) elif value: self.warning("Skip %s: %s" % (prop["name"].value, value)) @fault_tolerant def useFileProp(self, prop): self.bit_rate = prop["avg_bit_rate"].value self.duration = timedelta(milliseconds=prop["duration"].value) @fault_tolerant def useContentDesc(self, content): self.title = content["title"].value self.author = content["author"].value self.copyright = content["copyright"].value self.comment = content["comment"].value @fault_tolerant def useStreamProp(self, stream, index): meta = Metadata(self) meta.comment = "Start: %s" % stream["stream_start"].value if getValue(stream, "mime_type") == "logical-fileinfo": for prop in stream.array("file_info/prop"): self.useFileInfoProp(prop) else: meta.bit_rate = stream["avg_bit_rate"].value meta.duration = timedelta(milliseconds=stream["duration"].value) meta.mime_type = getValue(stream, "mime_type") meta.title = getValue(stream, "desc") self.addGroup("stream[%u]" % index, meta, "Stream #%u" % (1+index)) class MpegAudioMetadata(RootMetadata): TAG_TO_KEY = { # ID3 version 2.2 "TP1": "author", "COM": "comment", "TEN": "producer", "TRK": "track_number", "TAL": "album", "TT2": "title", "TYE": "creation_date", "TCO": "music_genre", # ID3 version 2.3+ "TPE1": "author", "COMM": "comment", "TENC": "producer", "TRCK": "track_number", "TALB": "album", "TIT2": "title", "TYER": "creation_date", "WXXX": "url", "TCON": "music_genre", "TLAN": "language", "TCOP": "copyright", "TDAT": "creation_date", "TRDA": "creation_date", "TORY": "creation_date", "TIT1": "title", } def processID3v2(self, field): # Read value if "content" not in field: return content = field["content"] if "text" not in content: return if "title" in content and content["title"].value: value = "%s: %s" % (content["title"].value, content["text"].value) else: value = content["text"].value # Known tag? tag = field["tag"].value if tag not in self.TAG_TO_KEY: if tag: if isinstance(tag, str): tag = makePrintable(tag, "ISO-8859-1", to_unicode=True) self.warning("Skip ID3v2 tag %s: %s" % (tag, value)) return key = self.TAG_TO_KEY[tag] setattr(self, key, value) def readID3v2(self, id3): for field in id3: if field.is_field_set and "tag" in field: self.processID3v2(field) def extract(self, mp3): if "/frames/frame[0]" in mp3: frame = mp3["/frames/frame[0]"] self.nb_channel = (frame.getNbChannel(), frame["channel_mode"].display) self.format_version = u"MPEG version %s layer %s" % \ (frame["version"].display, frame["layer"].display) self.sample_rate = frame.getSampleRate() self.bits_per_sample = 16 if mp3["frames"].looksConstantBitRate(): self.computeBitrate(frame) else: self.computeVariableBitrate(mp3) if "id3v1" in mp3: id3 = mp3["id3v1"] self.comment = id3["comment"].value self.author = id3["author"].value self.title = id3["song"].value self.album = id3["album"].value if id3["year"].value != "0": self.creation_date = id3["year"].value if "track_nb" in id3: self.track_number = id3["track_nb"].value if "id3v2" in mp3: self.readID3v2(mp3["id3v2"]) if "frames" in mp3: computeComprRate(self, mp3["frames"].size) def computeBitrate(self, frame): bit_rate = frame.getBitRate() # may returns None on error if not bit_rate: return self.bit_rate = (bit_rate, _("%s (constant)") % humanBitRate(bit_rate)) self.duration = timedelta(seconds=float(frame["/frames"].size) / bit_rate) def computeVariableBitrate(self, mp3): if self.quality <= QUALITY_FAST: return count = 0 if QUALITY_BEST <= self.quality: self.warning("Process all MPEG audio frames to compute exact duration") max_count = None else: max_count = 500 * self.quality total_bit_rate = 0.0 for index, frame in enumerate(mp3.array("frames/frame")): if index < 3: continue bit_rate = frame.getBitRate() if bit_rate: total_bit_rate += float(bit_rate) count += 1 if max_count and max_count <= count: break if not count: return bit_rate = total_bit_rate / count self.bit_rate = (bit_rate, _("%s (Variable bit rate)") % humanBitRate(bit_rate)) duration = timedelta(seconds=float(mp3["frames"].size) / bit_rate) self.duration = duration class AiffMetadata(RootMetadata): def extract(self, aiff): if "common" in aiff: self.useCommon(aiff["common"]) computeBitRate(self) @fault_tolerant def useCommon(self, info): self.nb_channel = info["nb_channel"].value self.bits_per_sample = info["sample_size"].value self.sample_rate = getValue(info, "sample_rate") if self.has("sample_rate"): rate = self.get("sample_rate") if rate: sec = float(info["nb_sample"].value) / rate self.duration = timedelta(seconds=sec) if "codec" in info: self.compression = info["codec"].display class FlacMetadata(RootMetadata): def extract(self, flac): if "metadata/stream_info/content" in flac: self.useStreamInfo(flac["metadata/stream_info/content"]) if "metadata/comment/content" in flac: readVorbisComment(self, flac["metadata/comment/content"]) @fault_tolerant def useStreamInfo(self, info): self.nb_channel = info["nb_channel"].value + 1 self.bits_per_sample = info["bits_per_sample"].value + 1 self.sample_rate = info["sample_hertz"].value sec = info["total_samples"].value if sec: sec = float(sec) / info["sample_hertz"].value self.duration = timedelta(seconds=sec) registerExtractor(AuFile, AuMetadata) registerExtractor(MpegAudioFile, MpegAudioMetadata) registerExtractor(OggFile, OggMetadata) registerExtractor(RealMediaFile, RealMediaMetadata) registerExtractor(RealAudioFile, RealAudioMetadata) registerExtractor(AiffFile, AiffMetadata) registerExtractor(FlacParser, FlacMetadata) hachoir-metadata-1.3.3/test_doc.py0000755000175000017500000000131111251277274016120 0ustar haypohaypo#!/usr/bin/env python2.4 import doctest import sys def importModule(name): mod = __import__(name) components = name.split('.') for comp in components[1:]: mod = getattr(mod, comp) return mod def testModule(name): print "--- Test module %s" % name module = importModule(name) failure, nb_test = doctest.testmod(module) if failure: sys.exit(1) print "--- End of test" def main(): # Configure Hachoir for tests import hachoir_core.config as config config.use_i18n = False # Test documentation of some functions/classes testModule("hachoir_metadata.metadata") testModule("hachoir_metadata.setter") if __name__ == "__main__": main() hachoir-metadata-1.3.3/metadata_csv.py0000755000175000017500000000563011325706657016763 0ustar haypohaypo#!/usr/bin/env python from hachoir_core.error import HachoirError from hachoir_core.cmd_line import unicodeFilename from hachoir_parser import createParser from hachoir_core.tools import makePrintable from hachoir_metadata import extractMetadata from hachoir_core.i18n import initLocale from sys import argv, stderr, exit from os import walk from os.path import join as path_join from fnmatch import fnmatch import codecs OUTPUT_FILENAME = "metadata.csv" class Extractor: def __init__(self, directory, fields): self.directory = directory self.fields = fields self.charset = "UTF-8" self.total = 0 self.invalid = 0 def main(self): output = codecs.open(OUTPUT_FILENAME, "w", self.charset) for filename in self.findFiles(self.directory, '*.doc'): self.total += 1 line = self.processFile(filename) if line: print >>output, line else: self.invalid += 1 output.close() self.summary() def summary(self): print >>stderr print >>stderr, "Valid files: %s" % (self.total - self.invalid) print >>stderr, "Invalid files: %s" % self.invalid print >>stderr, "Total files: %s" % self.total print >>stderr print >>stderr, "Result written into %s" % OUTPUT_FILENAME def findFiles(self, directory, pattern): for dirpath, dirnames, filenames in walk(directory): for filename in filenames: if not fnmatch(filename.lower(), pattern): continue yield path_join(dirpath, filename) def processFile(self, filename): filename, realname = unicodeFilename(filename), filename print u"[%s] Process file %s..." % (self.total, filename) parser = createParser(filename, realname) if not parser: print >>stderr, "Unable to parse file" return None try: metadata = extractMetadata(parser) except HachoirError, err: print >>stderr, "Metadata extraction error: %s" % unicode(err) return None if not metadata: print >>stderr, "Unable to extract metadata" return None filename = makePrintable(filename, self.charset, to_unicode=True) line = [filename] for field in self.fields: value = metadata.getText(field, u'') value = makePrintable(value, self.charset, to_unicode=True) line.append(value) return '; '.join(line) def main(): initLocale() if len(argv) != 3: print >>stderr, "usage: %s directory fields" % argv[0] print >>stderr print >>stderr, "eg. %s . title,creation_date" % argv[0] exit(1) directory = argv[1] fields = [field.strip() for field in argv[2].split(",")] Extractor(directory, fields).main() if __name__ == "__main__": main() hachoir-metadata-1.3.3/setup.py0000755000175000017500000000756411332531511015457 0ustar haypohaypo#!/usr/bin/python # Script to install hachoir-metadata module and programs # # Options: # --setuptools: use setuptools instead of distutils # --disable-qt: don't install hachoir-metadata-qt # #--------------- # # Procedure to release a new version: # - edit hachoir_metadata/version.py: VERSION = "XXX" # - edit setup.py: install_options["install_requires"] = ["hachoir-core>=1.3", "hachoir-parser>=1.3"] # - edit INSTALL: Dependencies section # - edit ChangeLog (set release date) # - run: ./test_doc.py # - run: ./run_testcase.py ~/testcase # - run: hg commit # - run: hg tag hachoir-metadata-XXX # - run: hg push # - run: python2.5 ./setup.py --setuptools register sdist bdist_egg upload # - run: python2.4 ./setup.py --setuptools bdist_egg upload # - run: python2.6 ./setup.py --setuptools bdist_egg upload # - check: http://pypi.python.org/pypi/hachoir-metadata # - update the website # * http://bitbucket.org/haypo/hachoir/wiki/Install/source # * http://bitbucket.org/haypo/hachoir/wiki/Home # - edit hachoir_metadata/version.py: set version to N+1 in # - edit ChangeLog: add a new "hachoir-metadata N+1" section with text XXX from imp import load_source from os import path import sys CLASSIFIERS = [ 'Intended Audience :: Developers', 'Development Status :: 5 - Production/Stable', 'Environment :: Console :: Curses', 'Topic :: Multimedia', 'License :: OSI Approved :: GNU General Public License (GPL)', 'Operating System :: OS Independent', 'Natural Language :: English', 'Programming Language :: Python'] def main(): if "--setuptools" in sys.argv: sys.argv.remove("--setuptools") from setuptools import setup use_setuptools = True else: from distutils.core import setup use_setuptools = False SCRIPTS = ["hachoir-metadata", "hachoir-metadata-gtk"] PACKAGES = ["hachoir_metadata"] if "--disable-qt" not in sys.argv: from subprocess import call SCRIPTS.append("hachoir-metadata-qt") dialog = "hachoir_metadata/qt/dialog" dialog_python = dialog + "_ui.py" command = ["pyuic4", "-o", dialog_python, dialog + ".ui"] try: exitcode = call(command) except OSError, err: exitcode = 1 if exitcode: if path.exists(dialog_python): print >>sys.stderr, "Warning: unable to recompile dialog.ui to dialog_ui.py using pyuic4" print >>sys.stderr, '(use command "%s --disable-qt" to disable this warning)' % ' '.join(sys.argv) print >>sys.stderr else: print >>sys.stderr, "ERROR: Unable to compile dialog.ui to dialog_ui.py using pyuic4" print >>sys.stderr, 'Use command "%s --disable-qt" to skip hachoir-metadata-qt' % ' '.join(sys.argv) print >>sys.stderr, 'pyuic4 is included in the PyQt4 development package' sys.exit(1) PACKAGES.append("hachoir_metadata.qt") else: sys.argv.remove("--disable-qt") hachoir_metadata = load_source("version", path.join("hachoir_metadata", "version.py")) long_description = open('README').read() + open('ChangeLog').read() install_options = { "name": hachoir_metadata.PACKAGE, "version": hachoir_metadata.VERSION, "url": hachoir_metadata.WEBSITE, "download_url": hachoir_metadata.WEBSITE, "author": "Victor Stinner", "description": "Program to extract metadata using Hachoir library", "long_description": long_description, "classifiers": CLASSIFIERS, "license": hachoir_metadata.LICENSE, "scripts": SCRIPTS, "packages": PACKAGES, } if use_setuptools: install_options["install_requires"] = ["hachoir-core>=1.3", "hachoir-parser>=1.3"] install_options["zip_safe"] = True setup(**install_options) if __name__ == "__main__": main() hachoir-metadata-1.3.3/INSTALL0000644000175000017500000000055011330145670014764 0ustar haypohaypoDependencies ============ hachoir-metadata: * hachoir-core 1.3 * hachoir-parser 1.3 hachoir-metadata-gtk: * pygtk (Debian package: python-gtk2) hachoir-metadata-qt: * PyQt4 To compile hachoir_metadata/qt/dialog.ui, you need pyuic4 which is part of PyQt4 development tools. Install ======= Run the follow command as root: :: ./setup.py install hachoir-metadata-1.3.3/setup.cfg0000644000175000017500000000007311423160161015547 0ustar haypohaypo[egg_info] tag_build = tag_date = 0 tag_svn_revision = 0 hachoir-metadata-1.3.3/README0000644000175000017500000000551511251277507014630 0ustar haypohaypohachoir-metadata extracts metadata from multimedia files: music, picture, video, but also archives. It supports most common file formats: * Archives: bzip2, gzip, zip, tar * Audio: MPEG audio ("MP3"), WAV, Sun/NeXT audio, Ogg/Vorbis (OGG), MIDI, AIFF, AIFC, Real audio (RA) * Image: BMP, CUR, EMF, ICO, GIF, JPEG, PCX, PNG, TGA, TIFF, WMF, XCF * Misc: Torrent * Program: EXE * Video: ASF format (WMV video), AVI, Matroska (MKV), Quicktime (MOV), Ogg/Theora, Real media (RM) It tries to give as much information as possible. For some file formats, it gives more information than libextractor for example, such as the RIFF parser, which can extract creation date, software used to generate the file, etc. But hachoir-metadata cannot guess informations. The most complex operation is just to compute duration of a music using frame size and file size. hachoir-metadata has three modes: * classic mode: extract metadata, you can use --level=LEVEL to limit quantity of information to display (and not to extract) * --type: show on one line the file format and most important informations * --mime: just display file MIME type The command 'hachoir-metadata --mime' works like 'file --mime', and 'hachoir-metadata --type' like 'file'. But today file command supports more file formats then hachoir-metadata. Website: http://bitbucket.org/haypo/hachoir/wiki/hachoir-metadata Example ======= Example on AVI video (RIFF file format):: $ hachoir-metadata pacte_des_gnous.avi Common: - Duration: 4 min 25 sec - Comment: Has audio/video index (248.9 KB) - MIME type: video/x-msvideo - Endian: Little endian Video stream: - Image width: 600 - Image height: 480 - Bits/pixel: 24 - Compression: DivX v4 (fourcc:"divx") - Frame rate: 30.0 Audio stream: - Channel: stereo - Sample rate: 22.1 KHz - Compression: MPEG Layer 3 Modes --mime and --type ======================= Option --mime ask to just display file MIME type (works like UNIX "file --mime" program):: $ hachoir-metadata --mime logo-Kubuntu.png sheep_on_drugs.mp3 wormux_32x32_16c.ico logo-Kubuntu.png: image/png sheep_on_drugs.mp3: audio/mpeg wormux_32x32_16c.ico: image/x-ico Option --file display short description of file type (works like UNIX "file" program):: $ hachoir-metadata --type logo-Kubuntu.png sheep_on_drugs.mp3 wormux_32x32_16c.ico logo-Kubuntu.png: PNG picture: 331x90x8 (alpha layer) sheep_on_drugs.mp3: MPEG v1 layer III, 128.0 Kbit/sec, 44.1 KHz, Joint stereo wormux_32x32_16c.ico: Microsoft Windows icon: 16x16x32 Similar projects ================ * Kaa - http://freevo.sourceforge.net/cgi-bin/freevo-2.0/Kaa (written in Python) * libextractor: http://gnunet.org/libextractor/ (written in C) A *lot* of other libraries are written to read and/or write metadata in MP3 music and/or EXIF photo. hachoir-metadata-1.3.3/gnome/0000755000175000017500000000000011423160161015033 5ustar haypohaypohachoir-metadata-1.3.3/gnome/hachoir0000755000175000017500000000125211251277274016413 0ustar haypohaypo#!/bin/bash set -x TMP_FILE=$(mktemp /tmp/hachoir-metadataXXXXXX) hachoir-metadata "$1" > $TMP_FILE NBR_LIGNES=$(wc -l $TMP_FILE | cut -d " " -f 1) X=$(($NBR_LIGNES/3+2)) if [ $X -gt 10 ] then HIGHT_WINDOWS=100 else if [[ $X -gt 3 && $X -lt 11 ]] then HIGHT_WINDOWS=$((X*10)) else HIGHT_WINDOWS=20 fi fi NBR_MAX_CHAR=$(wc -L $TMP_FILE | cut -d " " -f 1) if [ ${NBR_MAX_CHAR} -lt 10 ] then WIDTH_WINDOWS=20 else if [ ${NBR_MAX_CHAR} -lt 100 ] then WIDTH_WINDOWS=160 else WIDTH_WINDOWS=$(( ( ${NBR_MAX_CHAR} * 15 ) / 13 )) fi fi gdialog --title "$1" --textbox $TMP_FILE $HIGHT_WINDOWS $WIDTH_WINDOWS rm $TMP_FILE hachoir-metadata-1.3.3/gnome/README0000644000175000017500000000027511251277274015734 0ustar haypohaypoNautilus hachoir-metadata plugin. Installation ============ To install it, just copy "hachoir" file into directory: ~/.gnome2/nautilus-scripts/ Dependencies ============ * gdialog hachoir-metadata-1.3.3/ChangeLog0000644000175000017500000000572611423160076015517 0ustar haypohaypohachoir-metadata 1.3.3 (2010-07-26) =================================== * Support WebM video (update Matroska extractor) * Matroska parser extracts audio bits per sample hachoir-metadata 1.3.2 (2010-02-04) =================================== * Include hachoir_metadata/qt/dialog_ui.py in MANIFEST.in * setup.py ignores pyuic4 error if dialog_ui.py is present * setup.py installs hachoir_metadata.qt module hachoir-metadata 1.3.1 (2010-01-28) =================================== * setup.py compiles dialog.ui to dialog_ui.py and install hachoir-metadata-qt. Create --disable-qt option to skip hachoir-metadata-qt installation. * Create a MANIFEST.in file to include extra files like ChangeLog, AUTHORS, gnome and kde subdirectories, test_doc.py, etc. hachoir-metadata 1.3 (2010-01-20) ================================= * Create hachoir-metadata-qt: a graphical interface (Qt toolkit) to display files metadata * Create ISO9660 extractor * Hide Hachoir warnings by default (use --verbose to show them) * hachoir-metadata program: create --force-parser option to choose the parser hachoir-metadata 1.2.1 (2008-10-16) =================================== * Using --raw, strings are not normalized (don't strip trailing space, new line, nul byte, etc.) * Extract much more informations from Microsoft Office documents (.doc, .xsl, .pps, etc.) * Improve OLE2 (Word) extractor * Fix ASF extractor for hachoir-parser 1.2.1 hachoir-metadata 1.2 (2008-09-03) ================================= * Create --maxlen option for hachoir-metadata program: --maxlen=0 disable the arbitrary string length limit * Create FLAC metadata extractor * Create hachoir_metadata.config, especially MAX_STR_LENGTH option (maximum string length) * GIF image may contains multiple comments hachoir-metadata 1.1 (2008-04-01) ================================= * More extractors are more stable and fault tolerant * Create basic Gtk+ GUI: hachoir-metadata-gtk * Catch error on data conversion * Read width and height DPI for most image formats * JPEG (EXIF): read GPS informations * Each data item can has its own "setter" * Add more ID3 keys (TCOP, TDAT, TRDA, TORY, TIT1) * Create datetime filter supporting timezone * Add "meters", "pixels", "DPI" suffix for human display * Create SWF extractor * RIFF: read also informations from headers field, compute audio compression rate * MOV: read width and height * ASF: read album artist hachoir-metadata 1.0.1 (???) ============================ * Only use hachoir_core.profiler with --profiler command line option so 'profiler' Python module is now optional * Set shebang to "#!/usr/bin/python" hachoir-metadata 1.0 (2007-07-11) ================================= * Real audio: read number of channel, bit rate, sample rate and compute compression rate * JPEG: Read user commment * Windows ANI: Read frame rate * Use Language from hachoir_core to store language from ID3 and MKV * OLE2 and FLV: Extractors are now fault tolerant hachoir-metadata-1.3.3/AUTHORS0000644000175000017500000000063211251277274015014 0ustar haypohaypoElie Roudninski aka adema - Started Gtk GUI Feth Arezki - Fix hachoir-metadata-qt to save the current directory Jean-Marc Libs - KDE plugin Pierre THIERRY - KDE plugin Thomas PABST - Gnome plugin Victor Stinner aka haypo - Metadata core hachoir-metadata-1.3.3/COPYING0000644000175000017500000004313311251277274015002 0ustar haypohaypo GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Library General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Library General Public License instead of this License. hachoir-metadata-1.3.3/hachoir_metadata.egg-info/0000755000175000017500000000000011423160161020675 5ustar haypohaypohachoir-metadata-1.3.3/hachoir_metadata.egg-info/requires.txt0000644000175000017500000000004511423160157023301 0ustar haypohaypohachoir-core>=1.3 hachoir-parser>=1.3hachoir-metadata-1.3.3/hachoir_metadata.egg-info/SOURCES.txt0000644000175000017500000000215211423160157022566 0ustar haypohaypoAUTHORS COPYING ChangeLog INSTALL MANIFEST.in README hachoir-metadata hachoir-metadata-gtk hachoir-metadata-qt metadata_csv.py run_testcase.py setup.py test_doc.py gnome/README gnome/hachoir hachoir_metadata/__init__.py hachoir_metadata/archive.py hachoir_metadata/audio.py hachoir_metadata/config.py hachoir_metadata/file_system.py hachoir_metadata/filter.py hachoir_metadata/formatter.py hachoir_metadata/image.py hachoir_metadata/jpeg.py hachoir_metadata/metadata.py hachoir_metadata/metadata_item.py hachoir_metadata/misc.py hachoir_metadata/program.py hachoir_metadata/register.py hachoir_metadata/riff.py hachoir_metadata/safe.py hachoir_metadata/setter.py hachoir_metadata/timezone.py hachoir_metadata/version.py hachoir_metadata/video.py hachoir_metadata.egg-info/PKG-INFO hachoir_metadata.egg-info/SOURCES.txt hachoir_metadata.egg-info/dependency_links.txt hachoir_metadata.egg-info/requires.txt hachoir_metadata.egg-info/top_level.txt hachoir_metadata.egg-info/zip-safe hachoir_metadata/qt/__init__.py hachoir_metadata/qt/dialog.ui hachoir_metadata/qt/dialog_ui.py kde/README kde/hachoir-metadata-kde kde/hachoir.desktophachoir-metadata-1.3.3/hachoir_metadata.egg-info/zip-safe0000644000175000017500000000000111423160157022332 0ustar haypohaypo hachoir-metadata-1.3.3/hachoir_metadata.egg-info/PKG-INFO0000644000175000017500000001750111423160157022003 0ustar haypohaypoMetadata-Version: 1.0 Name: hachoir-metadata Version: 1.3.3 Summary: Program to extract metadata using Hachoir library Home-page: http://bitbucket.org/haypo/hachoir/wiki/hachoir-metadata Author: Victor Stinner Author-email: UNKNOWN License: GNU GPL v2 Download-URL: http://bitbucket.org/haypo/hachoir/wiki/hachoir-metadata Description: hachoir-metadata extracts metadata from multimedia files: music, picture, video, but also archives. It supports most common file formats: * Archives: bzip2, gzip, zip, tar * Audio: MPEG audio ("MP3"), WAV, Sun/NeXT audio, Ogg/Vorbis (OGG), MIDI, AIFF, AIFC, Real audio (RA) * Image: BMP, CUR, EMF, ICO, GIF, JPEG, PCX, PNG, TGA, TIFF, WMF, XCF * Misc: Torrent * Program: EXE * Video: ASF format (WMV video), AVI, Matroska (MKV), Quicktime (MOV), Ogg/Theora, Real media (RM) It tries to give as much information as possible. For some file formats, it gives more information than libextractor for example, such as the RIFF parser, which can extract creation date, software used to generate the file, etc. But hachoir-metadata cannot guess informations. The most complex operation is just to compute duration of a music using frame size and file size. hachoir-metadata has three modes: * classic mode: extract metadata, you can use --level=LEVEL to limit quantity of information to display (and not to extract) * --type: show on one line the file format and most important informations * --mime: just display file MIME type The command 'hachoir-metadata --mime' works like 'file --mime', and 'hachoir-metadata --type' like 'file'. But today file command supports more file formats then hachoir-metadata. Website: http://bitbucket.org/haypo/hachoir/wiki/hachoir-metadata Example ======= Example on AVI video (RIFF file format):: $ hachoir-metadata pacte_des_gnous.avi Common: - Duration: 4 min 25 sec - Comment: Has audio/video index (248.9 KB) - MIME type: video/x-msvideo - Endian: Little endian Video stream: - Image width: 600 - Image height: 480 - Bits/pixel: 24 - Compression: DivX v4 (fourcc:"divx") - Frame rate: 30.0 Audio stream: - Channel: stereo - Sample rate: 22.1 KHz - Compression: MPEG Layer 3 Modes --mime and --type ======================= Option --mime ask to just display file MIME type (works like UNIX "file --mime" program):: $ hachoir-metadata --mime logo-Kubuntu.png sheep_on_drugs.mp3 wormux_32x32_16c.ico logo-Kubuntu.png: image/png sheep_on_drugs.mp3: audio/mpeg wormux_32x32_16c.ico: image/x-ico Option --file display short description of file type (works like UNIX "file" program):: $ hachoir-metadata --type logo-Kubuntu.png sheep_on_drugs.mp3 wormux_32x32_16c.ico logo-Kubuntu.png: PNG picture: 331x90x8 (alpha layer) sheep_on_drugs.mp3: MPEG v1 layer III, 128.0 Kbit/sec, 44.1 KHz, Joint stereo wormux_32x32_16c.ico: Microsoft Windows icon: 16x16x32 Similar projects ================ * Kaa - http://freevo.sourceforge.net/cgi-bin/freevo-2.0/Kaa (written in Python) * libextractor: http://gnunet.org/libextractor/ (written in C) A *lot* of other libraries are written to read and/or write metadata in MP3 music and/or EXIF photo. hachoir-metadata 1.3.3 (2010-07-26) =================================== * Support WebM video (update Matroska extractor) * Matroska parser extracts audio bits per sample hachoir-metadata 1.3.2 (2010-02-04) =================================== * Include hachoir_metadata/qt/dialog_ui.py in MANIFEST.in * setup.py ignores pyuic4 error if dialog_ui.py is present * setup.py installs hachoir_metadata.qt module hachoir-metadata 1.3.1 (2010-01-28) =================================== * setup.py compiles dialog.ui to dialog_ui.py and install hachoir-metadata-qt. Create --disable-qt option to skip hachoir-metadata-qt installation. * Create a MANIFEST.in file to include extra files like ChangeLog, AUTHORS, gnome and kde subdirectories, test_doc.py, etc. hachoir-metadata 1.3 (2010-01-20) ================================= * Create hachoir-metadata-qt: a graphical interface (Qt toolkit) to display files metadata * Create ISO9660 extractor * Hide Hachoir warnings by default (use --verbose to show them) * hachoir-metadata program: create --force-parser option to choose the parser hachoir-metadata 1.2.1 (2008-10-16) =================================== * Using --raw, strings are not normalized (don't strip trailing space, new line, nul byte, etc.) * Extract much more informations from Microsoft Office documents (.doc, .xsl, .pps, etc.) * Improve OLE2 (Word) extractor * Fix ASF extractor for hachoir-parser 1.2.1 hachoir-metadata 1.2 (2008-09-03) ================================= * Create --maxlen option for hachoir-metadata program: --maxlen=0 disable the arbitrary string length limit * Create FLAC metadata extractor * Create hachoir_metadata.config, especially MAX_STR_LENGTH option (maximum string length) * GIF image may contains multiple comments hachoir-metadata 1.1 (2008-04-01) ================================= * More extractors are more stable and fault tolerant * Create basic Gtk+ GUI: hachoir-metadata-gtk * Catch error on data conversion * Read width and height DPI for most image formats * JPEG (EXIF): read GPS informations * Each data item can has its own "setter" * Add more ID3 keys (TCOP, TDAT, TRDA, TORY, TIT1) * Create datetime filter supporting timezone * Add "meters", "pixels", "DPI" suffix for human display * Create SWF extractor * RIFF: read also informations from headers field, compute audio compression rate * MOV: read width and height * ASF: read album artist hachoir-metadata 1.0.1 (???) ============================ * Only use hachoir_core.profiler with --profiler command line option so 'profiler' Python module is now optional * Set shebang to "#!/usr/bin/python" hachoir-metadata 1.0 (2007-07-11) ================================= * Real audio: read number of channel, bit rate, sample rate and compute compression rate * JPEG: Read user commment * Windows ANI: Read frame rate * Use Language from hachoir_core to store language from ID3 and MKV * OLE2 and FLV: Extractors are now fault tolerant Platform: UNKNOWN Classifier: Intended Audience :: Developers Classifier: Development Status :: 5 - Production/Stable Classifier: Environment :: Console :: Curses Classifier: Topic :: Multimedia Classifier: License :: OSI Approved :: GNU General Public License (GPL) Classifier: Operating System :: OS Independent Classifier: Natural Language :: English Classifier: Programming Language :: Python hachoir-metadata-1.3.3/hachoir_metadata.egg-info/dependency_links.txt0000644000175000017500000000000111423160157024750 0ustar haypohaypo hachoir-metadata-1.3.3/hachoir_metadata.egg-info/top_level.txt0000644000175000017500000000002111423160157023425 0ustar haypohaypohachoir_metadata hachoir-metadata-1.3.3/PKG-INFO0000644000175000017500000001750111423160161015027 0ustar haypohaypoMetadata-Version: 1.0 Name: hachoir-metadata Version: 1.3.3 Summary: Program to extract metadata using Hachoir library Home-page: http://bitbucket.org/haypo/hachoir/wiki/hachoir-metadata Author: Victor Stinner Author-email: UNKNOWN License: GNU GPL v2 Download-URL: http://bitbucket.org/haypo/hachoir/wiki/hachoir-metadata Description: hachoir-metadata extracts metadata from multimedia files: music, picture, video, but also archives. It supports most common file formats: * Archives: bzip2, gzip, zip, tar * Audio: MPEG audio ("MP3"), WAV, Sun/NeXT audio, Ogg/Vorbis (OGG), MIDI, AIFF, AIFC, Real audio (RA) * Image: BMP, CUR, EMF, ICO, GIF, JPEG, PCX, PNG, TGA, TIFF, WMF, XCF * Misc: Torrent * Program: EXE * Video: ASF format (WMV video), AVI, Matroska (MKV), Quicktime (MOV), Ogg/Theora, Real media (RM) It tries to give as much information as possible. For some file formats, it gives more information than libextractor for example, such as the RIFF parser, which can extract creation date, software used to generate the file, etc. But hachoir-metadata cannot guess informations. The most complex operation is just to compute duration of a music using frame size and file size. hachoir-metadata has three modes: * classic mode: extract metadata, you can use --level=LEVEL to limit quantity of information to display (and not to extract) * --type: show on one line the file format and most important informations * --mime: just display file MIME type The command 'hachoir-metadata --mime' works like 'file --mime', and 'hachoir-metadata --type' like 'file'. But today file command supports more file formats then hachoir-metadata. Website: http://bitbucket.org/haypo/hachoir/wiki/hachoir-metadata Example ======= Example on AVI video (RIFF file format):: $ hachoir-metadata pacte_des_gnous.avi Common: - Duration: 4 min 25 sec - Comment: Has audio/video index (248.9 KB) - MIME type: video/x-msvideo - Endian: Little endian Video stream: - Image width: 600 - Image height: 480 - Bits/pixel: 24 - Compression: DivX v4 (fourcc:"divx") - Frame rate: 30.0 Audio stream: - Channel: stereo - Sample rate: 22.1 KHz - Compression: MPEG Layer 3 Modes --mime and --type ======================= Option --mime ask to just display file MIME type (works like UNIX "file --mime" program):: $ hachoir-metadata --mime logo-Kubuntu.png sheep_on_drugs.mp3 wormux_32x32_16c.ico logo-Kubuntu.png: image/png sheep_on_drugs.mp3: audio/mpeg wormux_32x32_16c.ico: image/x-ico Option --file display short description of file type (works like UNIX "file" program):: $ hachoir-metadata --type logo-Kubuntu.png sheep_on_drugs.mp3 wormux_32x32_16c.ico logo-Kubuntu.png: PNG picture: 331x90x8 (alpha layer) sheep_on_drugs.mp3: MPEG v1 layer III, 128.0 Kbit/sec, 44.1 KHz, Joint stereo wormux_32x32_16c.ico: Microsoft Windows icon: 16x16x32 Similar projects ================ * Kaa - http://freevo.sourceforge.net/cgi-bin/freevo-2.0/Kaa (written in Python) * libextractor: http://gnunet.org/libextractor/ (written in C) A *lot* of other libraries are written to read and/or write metadata in MP3 music and/or EXIF photo. hachoir-metadata 1.3.3 (2010-07-26) =================================== * Support WebM video (update Matroska extractor) * Matroska parser extracts audio bits per sample hachoir-metadata 1.3.2 (2010-02-04) =================================== * Include hachoir_metadata/qt/dialog_ui.py in MANIFEST.in * setup.py ignores pyuic4 error if dialog_ui.py is present * setup.py installs hachoir_metadata.qt module hachoir-metadata 1.3.1 (2010-01-28) =================================== * setup.py compiles dialog.ui to dialog_ui.py and install hachoir-metadata-qt. Create --disable-qt option to skip hachoir-metadata-qt installation. * Create a MANIFEST.in file to include extra files like ChangeLog, AUTHORS, gnome and kde subdirectories, test_doc.py, etc. hachoir-metadata 1.3 (2010-01-20) ================================= * Create hachoir-metadata-qt: a graphical interface (Qt toolkit) to display files metadata * Create ISO9660 extractor * Hide Hachoir warnings by default (use --verbose to show them) * hachoir-metadata program: create --force-parser option to choose the parser hachoir-metadata 1.2.1 (2008-10-16) =================================== * Using --raw, strings are not normalized (don't strip trailing space, new line, nul byte, etc.) * Extract much more informations from Microsoft Office documents (.doc, .xsl, .pps, etc.) * Improve OLE2 (Word) extractor * Fix ASF extractor for hachoir-parser 1.2.1 hachoir-metadata 1.2 (2008-09-03) ================================= * Create --maxlen option for hachoir-metadata program: --maxlen=0 disable the arbitrary string length limit * Create FLAC metadata extractor * Create hachoir_metadata.config, especially MAX_STR_LENGTH option (maximum string length) * GIF image may contains multiple comments hachoir-metadata 1.1 (2008-04-01) ================================= * More extractors are more stable and fault tolerant * Create basic Gtk+ GUI: hachoir-metadata-gtk * Catch error on data conversion * Read width and height DPI for most image formats * JPEG (EXIF): read GPS informations * Each data item can has its own "setter" * Add more ID3 keys (TCOP, TDAT, TRDA, TORY, TIT1) * Create datetime filter supporting timezone * Add "meters", "pixels", "DPI" suffix for human display * Create SWF extractor * RIFF: read also informations from headers field, compute audio compression rate * MOV: read width and height * ASF: read album artist hachoir-metadata 1.0.1 (???) ============================ * Only use hachoir_core.profiler with --profiler command line option so 'profiler' Python module is now optional * Set shebang to "#!/usr/bin/python" hachoir-metadata 1.0 (2007-07-11) ================================= * Real audio: read number of channel, bit rate, sample rate and compute compression rate * JPEG: Read user commment * Windows ANI: Read frame rate * Use Language from hachoir_core to store language from ID3 and MKV * OLE2 and FLV: Extractors are now fault tolerant Platform: UNKNOWN Classifier: Intended Audience :: Developers Classifier: Development Status :: 5 - Production/Stable Classifier: Environment :: Console :: Curses Classifier: Topic :: Multimedia Classifier: License :: OSI Approved :: GNU General Public License (GPL) Classifier: Operating System :: OS Independent Classifier: Natural Language :: English Classifier: Programming Language :: Python hachoir-metadata-1.3.3/hachoir-metadata-qt0000755000175000017500000001306711251277274017515 0ustar haypohaypo#!/usr/bin/env python from hachoir_metadata.qt.dialog_ui import Ui_Form from PyQt4.QtCore import SIGNAL from PyQt4.QtCore import Qt from PyQt4.QtGui import (QApplication, QDialog, QTableWidgetItem, QFileDialog, QMessageBox) from sys import argv, exit from os.path import basename, dirname from hachoir_core import config config.quiet = True from hachoir_core.error import HACHOIR_ERRORS from hachoir_core.cmd_line import unicodeFilename from hachoir_parser import createParser from hachoir_core.tools import makePrintable from hachoir_metadata import extractMetadata from hachoir_metadata.metadata import MultipleMetadata from hachoir_core.i18n import initLocale, getTerminalCharset from sys import argv, stderr, exit from os import walk, getcwd from os.path import join as path_join from fnmatch import fnmatch from datetime import datetime import codecs class CustomTableWidgetItem(QTableWidgetItem): def __init__(self, parent = None): super(CustomTableWidgetItem, self).__init__(parent) self.setFlags(Qt.ItemIsSelectable |Qt.ItemIsEnabled) class MetadataError(Exception): def __init__(self, message): self.unicode_message = message bytes_message = message.encode("ASCII", "replace") Exception.__init__(self, bytes_message) def __unicode__(self): return self.unicode_message class File: def __init__(self, filename, realname): self.name = filename if realname: self.realname = realname else: self.realname = filename try: self.parser = createParser(self.name, self.realname) except HACHOIR_ERRORS, err: raise MetadataError("Parser error: %s" % unicode(err)) if not self.parser: raise MetadataError("Unable to parse the file: %s" % self.name) try: self.metadata = extractMetadata(self.parser) except HACHOIR_ERRORS, err: raise MetadataError("Metadata extraction error: %s" % unicode(err)) if not self.metadata: file_type = self.parser.mime_type raise MetadataError("Unable to extract metadata from file of type %s" % file_type) class Metadata(QDialog, Ui_Form): def __init__(self, application): QDialog.__init__(self) self.application = application self.files = {} self.setupWindow() self.current_dir = dirname(getcwd()) def setupWindow(self): self.setupUi(self) self.connect(self.open_button, SIGNAL("clicked()"), self.open) self.connect(self.quit_button, SIGNAL("clicked()"), self.quit) self.connect( self.files_combo, SIGNAL("currentIndexChanged(const QString&)"), self.changeFile) self.metadata_table.horizontalHeader().hide() self.metadata_table.verticalHeader().hide() def open(self): filename = QFileDialog.getOpenFileName( self, #parent "Choose a file to open", #caption self.current_dir, #dir name "", #filter "Any type" #filter set ) if not filename: return filename = unicode(filename) self.current_dir = dirname(filename) self.addFile(filename, change=True) def quit(self): self.application.quit() def fillList(self, file): table = self.metadata_table metadata = file.metadata groups = [metadata] if isinstance(metadata, MultipleMetadata): groups.extend(list(metadata.iterGroups())) total = 0 for index, metadata in enumerate(groups): group_name = metadata.header metadata = [data for data in metadata if data.values] metadata.sort() if 0 < index: metadata.insert(0, group_name) groups[index] = metadata total += len(metadata) table.clear() table.setColumnCount(2) table.setRowCount(total) row = 0 for metadata in groups: for data in metadata: if isinstance(data, (str, unicode)): table.setItem(row, 0, CustomTableWidgetItem("-- group --")) table.setItem(row, 1, CustomTableWidgetItem(data)) row += 1 continue title = data.description for item in data.values: value = item.text table.setItem(row, 0, CustomTableWidgetItem(title)) table.setItem(row, 1, CustomTableWidgetItem(value)) row += 1 table.resizeColumnsToContents() def changeFile(self, name): name = unicode(name) file = self.files[name] self.fillList(file) def error(self, message): QMessageBox.warning(self, u"Metadata error", message) def addFile(self, filename, realname=None, change=False): try: file = File(filename, realname) except MetadataError, err: errmsg = unicode(err) self.error(errmsg) return name = basename(file.name) self.files[name] = file self.files_combo.addItem(name) if change: index = self.files_combo.count() - 1 self.files_combo.setCurrentIndex(index) def main(): app = QApplication(argv) metadata = Metadata(app) for filename in argv[1:]: realname = filename filename = unicodeFilename(filename) metadata.addFile(filename, realname) metadata.show() exitcode = app.exec_() exit(exitcode) if __name__ == "__main__": main() hachoir-metadata-1.3.3/kde/0000755000175000017500000000000011423160161014471 5ustar haypohaypohachoir-metadata-1.3.3/kde/hachoir.desktop0000644000175000017500000000120311251277274017512 0ustar haypohaypo# hachoir.desktop, by Jean-Marc Libs # [Desktop Entry] ServiceTypes=all/allfiles TryExec=kdialog TryExec=hachoir-metadata Actions=Hachoir X-KDE-Priority=TopLevel [Desktop Action Hachoir] Name=Hachoir metadata Icon=info #Exec=kdialog --passivepopup "`hachoir-metadata %F`" 30 --title "%N" --caption "%c" # Other possibility: Exec=echo " " > /tmp/hachoir.tmp || exit; hachoir-metadata-kde %F > /tmp/hachoir.tmp; kdialog --textbox /tmp/hachoir.tmp 500 600 --title "%N" --caption "%c"; unlink /tmp/hachoir.tmp #Exec=kdialog --msgbox "`hachoir-metadata %f`" --title "%n" --caption "%c" Name[fr]=Méta-données hachoir Name[de]=Hachoir-Metadaten hachoir-metadata-1.3.3/kde/README0000644000175000017500000000124511251277274015370 0ustar haypohaypoUsage ===== The hachoir.desktop file goes in: /usr/share/apps/konqueror/servicemenus The hachoir-metadata-kde goes in: /usr/bin (or anywhere in the PATH) Effect ====== When right-clicking on any file in konqueror, produces a "hachoir metadata" entry under "Actions" menu. This produces a pop-up which displays the file's metadata. Optional ======== Replace Exec=kdialog --passivepopup "`hachoir-metadata %F`" 30 --title "%N" --caption "%c" with Exec=echo " " > /tmp/hachoir.tmp || exit; hachoir-metadata-kde %F > /tmp/hachoir.tmp; kdialog --textbox /tmp/hachoir.tmp 500 600 --title "%N" --caption "%c"; unlink /tmp/hachoir.tmp for a real kdialog pop-up hachoir-metadata-1.3.3/kde/hachoir-metadata-kde0000755000175000017500000000013011251277274020362 0ustar haypohaypo#!/bin/bash for i in "$@" do echo "File: '$i'" hachoir-metadata "$i" echo " " done