doclifter-2.11/0000775000175000017500000000000012152465736011571 5ustar esresrdoclifter-2.11/manlifter0000775000175000017500000006335312152465736013512 0ustar esresr#!/usr/bin/python -u # # Run doclifter against an entire manual tree. # Sees all files in section 1 through 8 by default. import sys, os, getopt, signal, time, re, commands, cStringIO, stat import hotshot, hotshot.stats mandir = "/usr/share/man" patchdir = os.path.abspath("prepatch") outdir = None patched = 0 makehtml = False xslfragment = None processed = set([]) excluded_files = [] def manfile(section, basename=""): "Return a manual file or directory based on section name." if not basename: # Return the directory return "%s/man%s/" % (mandir, section) elif basename[0] == '/': return basename elif basename.endswith(".gz") or basename.endswith(".bz2") or basename.endswith(".Z"): # We've been handed an actual filename return "%s/man%s/%s" % (mandir, section, basename) else: # We've been handed a filename section return "%s/man%s/%s.%s.gz" % (mandir, section[:1], basename, section) def analyze_manpage(manpage): "Provide log annotations based on content." exclusions = ( ("", "This page is HTML"), ("auto-generated by docbook2man-spec", "DocBook"), ("automatically generated by docbook2man", "DocBook"), ("Generated by db2man.xsl", "XML DocBook"), ("Automatically generated by Pod::Man", "Pod::Man"), ("Man page generated from reStructeredText", "reStructuredText"), ("Man page generated from reStructuredText", "reStructuredText"), ("Generator: DocBook XSL Stylesheets", "DocBook stylesheets"), ("Generated by docutils manpage writer", "docutils"), ("DocBook SGML with docbook-to-man", "DocBook SGML"), ("Doxygen", "Doxygen"), ) output = "" fp = open(manpage) text = fp.read() for (pattern, generator) in exclusions: if text.find(pattern) > -1: output += "Generated from %s\n" % generator fp.close() return output def fetch_page(file, localcopy, patch): "Grab a local copy of a man page, patching if needed." output = "" if file[-3:] == ".gz": cstat = os.system("gunzip <%s >%s" % (file, localcopy)) elif file[-4:] == ".bz2": cstat = os.system("bunzip2 <%s >%s" % (file, localcopy)) elif file[-2:] == ".Z": cstat = os.system("uncompress <%s >%s" % (file, localcopy)) else: cstat = os.system("cp %s %s" % (file, localcopy)) if os.WIFSIGNALED(cstat) or os.WEXITSTATUS(cstat): return (1, output + "manlifter: copy failed, status %d", cstat) if os.path.exists(patch): here = os.getcwd() os.chdir(outdir) patch = commands.getoutput("patch --version-control=never <%s" % (patch,)) stem = os.path.basename(localcopy) os.system("rm -f %s.orig %s.rej" % (stem, stem)) os.chdir(here) if patch: output += patch + "\n" return (0, output) def getstem(file): "Reduce the name of a man page or generated HTML file to its stem" if file.endswith(".xml"): file = file[:-4] file = ".".join(file.split(".")[:-1]) # Remove section return file def make_xml(source, options): "Make XML from specified man page." (doclifter_status, output) = commands.getstatusoutput("doclifter -I %s %s %s" % (mandir, options, source)) if output: output += "\n" if os.WIFEXITED(doclifter_status): doclifter_status = os.WEXITSTATUS(doclifter_status) else: # Should never happen raise ValueError lxmlloc = None if doclifter_status == 2: fp = open(source) contents = fp.read() inclusions = re.compile(r"\.so\s+(.*)").search(contents) fp.close() if inclusions: lxmlloc = os.path.join(outdir, getstem(inclusions.group(1)) + ".xml") return(2, lxmlloc, output) return (doclifter_status, None, output) def validate(translation): "Validate an XML file produced by translation." output = "" # If it has entity inclusions it won't validate, so don't try. # This is only a good idea because man pages that have these are # usually trivial wrappers like builtins.1 try: fp = open(translation) text = fp.read() inclusions = re.compile("").search(text) equation = "/dev/null" % translation) if validate_out: output += validate_out + "\n" if os.WIFSIGNALED(bstat): output += "Bailing out of xmllint...\n" return (-1, output) xmllint_error_status = os.WEXITSTATUS(bstat) if xmllint_error_status: output += "xmllint error status:%s\n" % os.WEXITSTATUS(bstat) if xmllint_error_status: return (6, output) return (0, output) def format(translation, fmt, xslfragment): "Format an XML file to a specified format." output = "" here = os.getcwd() os.chdir(os.path.dirname(translation)) if xslfragment: command = "xmlto %s %s" % (fmt, os.path.basename(translation)) else: command = "xmlto -m %s %s %s" % (xslfragment, fmt, os.path.basename(translation)) (bstat, format_out) = commands.getstatusoutput(command) os.chdir(here) if format_out: output += format_out + "\n" if os.WIFSIGNALED(bstat): output += "Bailing out of %s formatting...\n" % fmt return (-1, output) format_error_status = os.WEXITSTATUS(bstat) if format_error_status: output += "format error status:%s\n" % os.WEXITSTATUS(bstat) if format_error_status: return (6, output) return (0, output) def deploy(source, target): try: os.rename(source, target) except OSError, e: return(3, "Rename of %s to %s failed, errno = %d" % (source, target, e.errno,)) return (0, "") def makelink(source, target): try: os.symlink(os.path.abspath(source), os.path.abspath(target)) except OSError: pass def singlerun(file, options, tmpstem="foo"+`os.getpid()`, batchmode=False): "Test-format a single file." global patched foundpatch = False if not os.path.exists(file): return (0, False, "") output = "" if file[-3:] == ".gz": withsect = os.path.basename(file)[:-3] elif file[-4:] == ".bz2": withsect = os.path.basename(file)[:-4] elif file[-2:] == ".Z": withsect = os.path.basename(file)[:-2] else: withsect = os.path.basename(file) dot = withsect.rindex(".") section = withsect[dot+1:dot+2] subdir = os.path.join(outdir, "man" + section) stem = getstem(withsect) xmlloc = os.path.join(subdir, stem + ".xml") # Count patches here so our stats won't be off patch = os.path.join(patchdir, withsect + ".patch") if os.path.exists(patch): patched += 1 foundpatch = True try: global processed tmpstem = os.path.join(outdir, tmpstem) source = tmpstem + ".man" # Grab the actual manual page localcopy = os.path.join(outdir, withsect) (status, output) = fetch_page(file, localcopy, patch) if (status): return (status, False, output) # Save work by doing conversions only as needed analysis = analyze_manpage(localcopy) rebuild_xml = True if batchmode and os.path.exists(xmlloc): if os.stat(file).st_mtime < os.lstat(xmlloc).st_mtime: output += "XML conversion is up to date.\n" processed.discard(withsect) rebuild_xml = False if batchmode and "DocBook" in analysis: output += "Made from DocBook masters.\n" processed.discard(withsect) return (7, False, output) if batchmode and "Doxygen" in analysis: output += "Made by Doxygen.\n" processed.discard(withsect) return (7, False, output) htmlloc = os.path.join(subdir, stem + ".html") if rebuild_xml: # Note the the patch was used processed.discard(withsect) # Add any annotations output += analysis # Save the location of the page loc = tmpstem + ".loc" lfp = open(loc, "w") lfp.write(withsect) lfp.close() # Move the source file into the output directory os.rename(localcopy, source) # Run the translator (doclifter_status, lxmlloc, note) = make_xml(source, options) output += note if doclifter_status not in (0, 2): if not batchmode: output += "doclifter error status: %s\n" % doclifter_status return (doclifter_status, foundpatch, output) translation = tmpstem + ".man.xml" # Warn about FIX-ME problems output += commands.getoutput("grep FIX-ME " + translation + " 2>/dev/null") # If the translation went through, cleaning up consists # of putting this in its permanent location. try: # This will foo up if we ever have to symlink between dirs if batchmode and not os.path.exists(subdir): os.mkdir(subdir) except OSError, e: return(3, foundpatch, output + "Creation of %s failed, errno = %d\n"%(subdir,e.errno)) if doclifter_status == 2: makelink(lxmlloc, xmlloc) if doclifter_status == 0: if not makehtml: (status, more) = validate(translation) output += more if batchmode and status: os.remove(translation) try: os.remove(htmlloc) except OSError: pass return (status, foundpatch, output) if batchmode: (status, more) = deploy(translation, xmlloc) translation = xmlloc output += more if status: return (status, foundpatch, output) # Save work by doing HTML conversions only as needed rebuild_html = makehtml if batchmode and os.path.exists(htmlloc): if os.stat(xmlloc).st_mtime < os.lstat(htmlloc).st_mtime: output += "HTML conversion is up to date\n" rebuild_html = False if rebuild_html: if batchmode: htmlloc = os.path.join(subdir, stem + ".html") else: htmlloc = stem + ".html" if batchmode and stat.S_ISLNK(os.lstat(xmlloc).st_mode): makelink(os.readlink(xmlloc)[:-4]+".html", htmlloc) else: (status, more) = format(translation, "xhtml-nochunks", xslfragment) output += more if status: if batchmode: os.remove(xmlloc) try: os.remove(htmlloc) except OSError: pass return (status, foundpatch, output) finally: # Clean up if batchmode: if os.path.exists(source): os.remove(source) return (0, foundpatch, output) def sectionfiles(sections): "Generate files corresponding to a list of sections." files = [] for section in sections: files = files + map(lambda f: manfile(section, f), os.listdir(manfile(section))) files.sort() return files total = eligible = starttime = 0 def report_elapsed(elapsed): "Report elapsed time in friendly format." return "%02dh:%02dm:%02ds" % (elapsed/3600, (elapsed % 3600)/60, elapsed % 60) def massrun(files, options, profiling): "Test against all files in specified sections." def bailout(signum, frame): print "\nBailing out with signal %d..." % signum os.system("rm -f doclifter_test%s.py doclifter_test%s.py[co]" % (os.getpid(), os.getpid())) sys.exit(0) global total, eligible, starttime total = 0 starttime = int(time.time()) eligible = len(files) doclifter_error_count = xmllint_error_count = docbook_count = total = 0 def report(sig, frame, out=sys.stderr): ftotal = float(total) elapsed = int(time.time()) - starttime out.write("\n%%%d of %d files in %s, %d OK, %d preconverted, %d patched, %d doclifter errors, %d validation failures, %2.2f%% good.\n" % \ (total, eligible, report_elapsed(elapsed), (total - doclifter_error_count - xmllint_error_count), docbook_count, patched, doclifter_error_count, xmllint_error_count, (ftotal-doclifter_error_count-xmllint_error_count-patched)*100.0/ftotal)) def test(file, options): before = time.time() (status, patched, output) = singlerun(file=file, options=options, batchmode=True) after = time.time() sys.stdout.write("! %s=%d%s (%2.2f)\n%s\n" % (file, status, " *"[patched], after-before, output)) return (status, output) signal.signal(signal.SIGUSR2, report) signal.signal(signal.SIGHUP, bailout) signal.signal(signal.SIGINT, bailout) signal.signal(signal.SIGPWR, bailout) signal.signal(signal.SIGTERM, bailout) print "%Test started", time.ctime() if profiling: print "%Profiling enabled.\n" else: print "%Profiling not enabled.\n" try: for file in files: if file in excluded_files: continue (status, output) = test(file=file, options=options) if status == -1: break elif status in (1, 4): # Doclifter parse or internal error. doclifter_error_count += 1 elif status == 2: # .so inclusion pass elif status in (3, 5): # File I/O error or keyboard interrupt pass elif status == 6: # Validation failure xmllint_error_count += 1 elif status == 7: docbook_count += 1 total = total + 1 except KeyboardInterrupt: pass report(0, sys.stdout) htmlheader = ''' Manlifter contents page ''' htmltrailer = "\n\n" def genindex(ofp): # Collect all section/name/description triples filelist = [] section_re = re.compile("/man([^/]*)") extract_re = re.compile("([^<]*)") section_dict = {} for (root, dirs, files) in os.walk('xmlman'): for file in files: try: if not file.endswith(".xml"): continue # Extract the manual section m = section_re.search(root) if m: section = m.group(1) else: continue section_dict[section] = [] # Extract the manual page name name = ".".join(file.split(".")[:-1]) # Extract the description file = os.path.join(root, file) fp = open(file) contents = fp.read() fp.close() m = extract_re.search(contents) if m: description = m.group(1) else: description = "(no description)" # Build an index entry filelist.append((section, name, description)) except IOError: pass filelist.sort() # In case the directory was pieced together by several runs for (section, name, description) in filelist: section_dict[section].append((name, description)) keys = section_dict.keys() keys.sort() for section in keys: ofp.write(htmlheader) ofp.write("

%s:

\n
\n" % section) for (name, description) in section_dict[section]: ofp.write("
%s
%s
\n" \ % (section, name, name, description)) ofp.write("
\n") ofp.write(htmltrailer) def statistics(): legends = ( "OK ", # No error "???", # Unliftable (normal error status) ".so", # failure due to inclusion "I/O", # I/O failure, could not reach page "!!!", # Internal error, doclifter blew up "^C ", # Translation interrupted "XML", # XML validation failure "NOP", # Already in DocBook ) counts = [0] * len(legends) patchcount = re.compile("([0-9]+) patched") warnings = 0 warn_latch = False while True: line = sys.stdin.readline() if not line: break elif not line.strip(): if warn_latch: warnings += 1 continue m = patchcount.search(line) if m: patched = int(m.group(1)) if "warning -" in line: warn_latch = True if line[0] != '!': continue warn_latch = False line = line[2:] rcolon = line.rindex("=") file = line[:rcolon] retval = line[rcolon+1:].split()[0] if retval.endswith("*"): retval = retval[:-1] if file.endswith(".gz"): file = file[:-3] elif file.endswith(".bz2"): file = file[:-4] elif file.endswith(".Z"): file = file[:-2] file = os.path.basename(file) counts[int(retval)] += 1 total = sum(counts) for (i, count) in enumerate(counts): print "%d = %s: %5d %2.2f%%" % (i, legends[i], count, (count * 1.0)*100/total) good = counts[0] bad = sum(counts[1:7]) print "Total: %d Errors: %d Warnings: %d" % (total, bad, warnings) print "Patched: %d (%2.2f%%)" % (patched, patched*100/float(total)) print "With patches: %d (%2.2f%%)" % (good, good*100/float(total)) print "Without patches: %d (%2.2f%%)" % (good-patched, (good-patched)*100/float(total)) def errorclean(error_only, pattern): if pattern: pattern = re.compile(pattern) pagename = re.compile(r"! (.*)=([0-9]+)") while 1: header = sys.stdin.readline() if not header: break # Look for a log leader m = pagename.search(header) if not m: continue subject = m.group(1) status = int(m.group(2)) # Collect following error messages up to a blank line trailer = '' while 1: line = sys.stdin.readline() trailer += line if not line or not line.strip(): break if pattern: # Emit by pattern if pattern.search(trailer): sys.stdout.write(subject+"\n") else: # Emit some of them by status def matches(s): return trailer.find(s) > -1 if status == 0 and not matches("warning"): continue if status == 1 and (matches("page is empty") or matches("page has no text")): continue if status in (2, 7): continue # Otherwise, emit if error_only: print subject else: sys.stdout.write(header + trailer) def patchman(stem="foobar"): "Make a patch against the last page lifted." if not os.path.exists(stem + ".man"): sys.stderr.write("manlifter: no trial page waiting.\n") raise SystemExit, 1 if not os.path.exists(stem + ".loc"): sys.stderr.write("manlifter: no saved page location.\n") raise SystemExit, 1 # Retrieve the location of the last page lfp = open(stem + ".loc") withsect = lfp.read() lfp.close() # Fail if patch already exists patch = os.path.join(patchdir, withsect + ".patch") if os.path.exists(patch): sys.stderr.write("manlifter: patch for %s already exists.\n" % withsect) raise SystemExit, 1 # Make copies for patching and do it trialpage = stem + ".man" unpatched = withsect + "-unpatched" try: os.system("cp %s %s" % (trialpage, unpatched)) os.system("cp %s %s" % (trialpage, withsect)) if os.system(os.getenv("EDITOR") + " " + withsect) == 0: os.system("diff -u %s %s >%s" % (unpatched, withsect, patch)) finally: os.system("rm -f %s %s %s" % (withsect, withsect + "~", unpatched)) citereftemplate = ''' %s /man / .html ''' def doclifter_driver(options, arguments): "Lift old markup to new." global mandir, makehtml, outdir, xslfragment, patchdir, makepatch, excluded_files filelist = [] sections = [] callopts = "" patchlift = False makehtml = False errorfilter = False quiet = False fval = None makepatch = False profiling = False excluded_files = [] for (switch, val) in options: if (switch == '-d'): callopts += " -d " + val elif (switch == '-e'): errorfilter = True elif (switch == '-f'): # Translate files in the specified list fval = val elif (switch == '-h'): makehtml = True elif (switch == '-I'): # Specify the root of the manual hierarchy mandir = val elif (switch == '-m'): # Make a patch from the last fetched page makepatch = True elif (switch == '-M'): # Make a patch with specified page patchlift = True elif (switch == '-p'): # Specify patch directory patchdir = os.path.abspath(val) elif (switch == '-P'): profiling = True elif (switch in ("-q", '-v', '-w')): # Set verbosity level quiet = True callopts += " " + switch elif (switch == '-s'): # Specify search list of sections sections.append(val) elif (switch == '-S'): # Generate statistics from log on stdin statistics() sys.exit(0) elif (switch == '-X'): excluded_files = open(val).read().split() if not sections: sections = ["1", "2", "3", "4", "5", "6", "7", "8"] if not outdir: if not arguments: outdir = 'xmlman' else: outdir = '.' # Clean/create the output directory if not arguments: if not os.path.exists(outdir): os.mkdir(outdir) # Create XSL fragment for making refentries into links xslfragment = os.path.abspath(os.path.join(outdir, "citerefentry.xsl")) fp = open(xslfragment, "w") fp.write(citereftemplate % outdir) fp.close() try: # Process args, if present if arguments: for file in arguments: for section in sections: manpage = manfile(section, file) print "Trying", manpage if os.path.exists(manpage): (status, patched, output) = singlerun(manpage, callopts, "foobar", batchmode=False) print output break if patchlift: patchman() elif makepatch: patchman() elif errorfilter: errorclean(quiet, fval) elif fval: fp = open(fval) filelist = map(lambda x: x.rstrip(), fp.readlines()) fp.close() massrun(filelist, callopts, profiling) else: global processed processed = set([]) if os.path.exists(patchdir): processed = set(map(lambda x: x.replace(".patch", "").replace(".correction", ""), os.listdir(patchdir))) massrun(sectionfiles(sections), callopts, profiling) if processed: print "%% %d patches not used:" % len(processed) for file in processed: print file finally: pass #os.remove(xslfragment) # Now, rebuild the index page if makehtml: fp = open(os.path.join(outdir, "index.html"), "w") genindex(fp) fp.close() if __name__ == "__main__": # Find a copy of doclifter for pathdir in ["."] + os.environ["PATH"].split(":"): where = os.path.join(pathdir, "doclifter") if os.path.exists(where): break else: sys.stderr.write("manlifter: can't find doclifter!\n") sys.exit(1) # Gather options (options, arguments) = getopt.getopt(sys.argv[1:], "d:ef:hI:mMp:Pqs:SvwX:") # Do the real work if "-P" in sys.argv: prof = hotshot.Profile("manlifter.prof") prof.runcall(doclifter_driver, options, arguments) prof.close() starttime = time.time() print "% Digesting profile results...", stats = hotshot.stats.load("manlifter.prof") stats.sort_stats('time', 'calls') print "took %s." % report_elapsed(time.time() - starttime) stats.print_stats(30) else: doclifter_driver(options, arguments) # End doclifter-2.11/manlifter.xml0000664000175000017500000002304312152465736014276 0ustar esresr manlifter 1 Sun Nov 28 2004 manlifter manlifter Documentation Tools manlifter mass-conversion script and test harness for doclifter manlifter -d option -e -f listfile -h -I mandir -m -M -o outdir -p patch-directory -P -q -v -s section -X exclude name manlifter -S Description manlifter is a script that sequences doclifter1 to convert an entire manual-page tree to XML-Docbook, optionally also generating HTML from the XML. Another use is as a torture-test tool for doclifter; it logs errors to standard output and collects timings. Called without any file arguments, manlifter tries to convert all eligible man pages installed on the system, placing the resulting xml files under xmlman in the current directory. Each successfully translated page foo.N is copied to manN/foo.xml beneath the output directory, regardless of what source directory it came from. A manual page is considered ineligible for batch conversion if it contains text indicating it has been generated from DocBook masters of from Doxygen. For each source file examined, if the destination file exists and is newer than the source, the conversion is skipped; thus, incremental runs of manlifter do the least work needed to keep the target XML tree up to date. Likewise, in -h mode derived HTML files are only made when necessary. Stub pages that are just .so redirections are translated to corresponding symlinks of XML files (and, with -h, HTML files). manlifter may also be called with a single file argument, which is interpreted as the stem name of a potential manual page. manlifter then searches all selected manual sections for a matching page and attempts to convert it. In this case, a copy of the man page and the converted version are dropped immediately beheath the output directory, with the names foobar.man and foobar.man.xml, respectively. This mode is normally only of interest only to doclifter developers for debugging that program. In either of the above cases, manlifter will uncompress the file if it has a .gz, .bz2 or .Z suffix on the name. Options are as follows: -d Pass the string argument to each doclifter call as options. Each space-separated token in the string becomes a separate argument in the call. -e Run in log-filter mode (mainly of interest to doclifter developers). In this mode, manlifter reads a test log from standard input and filters it in a a way dependent on the -f and -q options. If neither of these is given, messages from successful runs are stripped out and only errors passed through to standard output. -f Normally, run doclifter on the files named by each line in the argument file. In error-filter mode the argument is instead interpreted as a filtering regular expression. -h Also generate HTML translations into the output directory. DocBook citerefentry markup is transformed to hyperlinks in the directory, and a contents listing is generated to index.html. -I Specify the root of the manual-page tree. By default this is /usr/share/man. -m Make a patch to correct the last page fetched. It is copied, an editor is called on the copy (using the environment variable $EDITOR), and then diff1 is called to drop the patch in the prepatch directory. Fails with an error if such a patch is already present. -M Lift the specified files, then do the equivalent of the -m option. -o Set the output directory into which XML-DocBook translations will be dropped. By default this is xmlman under the current directory in batch mode, or the current directory otherwise. -p Interpret the argument as the name of a patch directory (the default name is prepatch under the current directory). Each file named foo.N.patch is interpreted as a patch to be applied to the manual page foo(N) before doclifter translates it. -P Enable profiling using the Python hotshot module; this is only useful for tuning doclifter so it runs faster. Raw data is written to manlifter.prof, and a digested report is appended to the log on standard output. Warning: the raw data files can become huge, and the postprocessing for report generation can take as long as the actual processing (or longer!). -q Normally, pass the -q (quiet) option to each doclifter call. In error-filter mode, return a list of files on which translation failed. -v Pass the -v (verbose) option to each doclifter call. This option can be repeated to increase the verbosity level. -s Specify a section to scan. Use this with an argument; it should not be necessary when doing a conversion of the entire tree. -S Compile error statistics from a manlifter logfile presented on standard input. This option will be of interest mainly to doclifter developers. -X In batch mode exclude pages listed in the argument file. Meant to be used for pages that are known good and take an extremely long time to lift, in order to cut down the time for a test run. (Most pages lift in less than a half second, but a few can take 15 minutes or longer.) manlifter emits a logfile to standard output. The file begins with a timestamp line and a blank line, and ends with a line giving run time and various interesting statistics. Between these are stanzas, separated by blank lines, one for each file on which doclifter was run. The first line of each stanza beguns with "! ", followed by the pathname of the source manual pager, followed by "=" and the return status of doclifter run on that file. Following that is a space and doclifter's runtime in seconds. This initial line may be followed by information messages and the error output of the doclifter run. manlifter must find a copy of doclifter in either the current directory or one of the command directories in your PATH in order to run. Bugs HTML generation is painfully slow. Unfortunately, there is little we can do to remedy this, because XSLT engines are painfully slow. See Also doclifter1, xmlto1 Author Eric S. Raymond esr@thyrsus.com There is a project web page at http://www.catb.org/~esr/doclifter/. doclifter-2.11/COPYING0000664000175000017500000000274512152465736012634 0ustar esresr COPYRIGHTS Copyright (c) 2002, 2003, 2007, and 2010 by Eric S. Raymond. BSD LICENSE Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.

Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.

Neither name of the doclifter project nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. doclifter-2.11/PATCHES0000664000175000017500000014064212152465736012612 0ustar esresr# All known problems with the manual pages in a desktop Ubuntu installation # # Send 1 was on 09 Dec 2003 # Send 2 was on 17 Feb 2004 # Send 3 was on 11 Jul 2004 # Send 4 was on 20 Nov 2004 # Send 5 was on 14 Jan 2005 # Send 6 was on 01 Jan 2007 # A Dot or single-quote at start of line turns it into a garbage command. This is a serious error; some lines of your page get silently lost when it is formatted. B Bogus macro definition. C Broken command synopsis syntax. This may mean you're using a construction in the command synopsis other than the standard [ ] | { }, or it may mean you have running text in the command synopsis section (the latter is not technically an error, but most cases of it are impossible to translate into DocBook markup), or it may mean the command syntax fails to match the description. D Non-break space prevents doclifter from incorrectly interpreting "Feature Test" as end of function synopsis. E My translator trips over a useless command in list markup. F Non-English-language page incorrectly installed. G Spurious trailing .CE H Renaming SYNOPSIS because either (a) third-party viewers and translators will try to interpret it as a command synopsis and become confused, or (b) it actually needs to be named "SYNOPSIS" with no modifier for function protoypes to be properly recognized. I Use of low-level troff hackery to set special indents or breaks can't be translated. The page will have rendering faults in HTML, and probably also under third-party man page browsers such as Xman, Rosetta, and the KDE help browser. This patch eliminates .br, .ta, .ti, .ce, .in, and \h in favor of requests like .RS/.RE that have structural translations. J Ambiguous or invalid backslash. This doesn't cause groff a problem. but it confuses doclifter and may confuse older troff implementations. K Renaming stock man macros throws warnings in doclifter and is likely to cause failures on third-party manual browsers. Please redo this page so it uses distinct names for the custom macros. L List syntax error. This means .IP, .TP or .RS/.RE markup is garbled. Common causes include .TP just before a section header, .TP entries with tags but no bodies, and mandoc lists with no trailing .El. These confuse doclifter, and may also mess up stricter man-page browsers like Xman and Rosetta. M Feature test macros (running text) included in a function synopsis prevents translation to DocBook. N Extraneous . at start of line. O Wrong order of arguments in .Dd macro. Q Spelling error or typo. R .ce markup can't be structurally translated, and is likely to cause rendering flaws in generated HTML. S DEPRECATED: in function syntax cannot be translated. Also, the code and examples need to be marked up better. T Junk at the beginning of the manual page. U Unbalanced group in command synopis. You probably forgot to open or close a [ ] or { } group properly. V Missing body content in list trips up doclifter and is likely to cause rendering problems in other viewers. I have been able to fill in what was missing except for what should be under TAR_LONGLINK_100. W Missing or garbled name section. The most common form of garbling is a missing - or extra -. Or your manual page may have been generated by a tool that doesn't emit a NAME section as it should. Or your page may add running text such as a version or authorship banner. These problems make it impossible to lift the page to DocBook. They can also confuse third-party manpage browsers and some implementations of man -k. X Unknown or invalid macro. That is, one that does not fit in the macro set that the man page seems to be using. This is a serious error; it often means part of your text is being lost or rendered incorrectly. Y I have been unable to identify an upstream maintainer for this Ubuntu/Debian package, and am notifying the generic "Maintainer" address in the package. Please forward appropriately. Also fix the package metadata so it identifies the upstream maintainers. Z Your Synopsis is exceptionally creative. Unfortunately, that means it cannot be translated to structural markup even when things like running-text inclusions have been moved elswhere. a ".fi" request was omitted or typoed as ".if". b Attempt to interpolate unknown string. c The composer of this man page misunderstood and seriously overused the \c escape. Some uses were broken; others (notably the sequence "\\c\n\\&") are bad style. d .eo/.ec and complex tab-stop hackery can't be translated to XML/HTML and are almost certain to confuse third-party readers such as Rosetta and Xman. e Macro definitions in the NAME section confuse doclifter and are likely to screw up third-party man viewers with their own parsers. f Absence of trailing \fRs makes synopsis unparseable. g Use of a double quote for inch measurements often confuses people who aren't from the Anglosphere. i Non-ASCII character in document synopsis can't be parsed. j Parenthesized comments in command synopsis. This is impossible to translate to DocBook. k kdemangen.pl stuttered two copies of a page. Also, .SS markup is garbled. l Incorrect formation of plural - beware the exiguous apostrophe! m Contains a request or escape that is outside the portable subset that can be rendered by non-groff viewers such as the KDE and GNOME help browsers. n Invalid Sx reference - not a section on this page. o TBL markup not used where it should be. Tables stitched together with .ta or list requests can't be lifted to DocBook and will often choke third-party viewers such as TKMan, XMan, Rosetta, etc. p Synopsis was incomplete and somewhat garbled. q Unused macro causes parsing problems. r I supplied a missing mail address. Without it, the .TP at the end of the authors list was ill-formed. s Changed page to use the .URL macro now preferred on man(7). t Synopsis has to be immediately after NAME section for DocBook translation to work. u Use local definitions of .EX/.EE or .DS/.DE to avoid low-level troff requests in the page body. There are plans to add these to groff man; in the interim, this patch adds a compatible definition to your page. v Missing DESCRIPTION section. w .SS markup in name section seriously confuses parsing, and sections don't follow standard naming conventions. x Syntax had to be rearranged because of an options callout. This is still excessively complicated; third-party man-page viewers are likely to choke on it. y I realize this man page is generated from POD, HTML, or some other non-man markup. Please fix the upstream markup so that it generates a well-formed manual page with the indicated corrections. z Garbled or missing text near .SS tags. It's not clear to me what's going on here, but .SS tags on adjacent lines defeat any attempt to parse the markup. I have inserted text lines indicating that something needs to be written here. 1 Garbled comment leader is likely to confuse third-party readers. 2 Use of man or mandoc lists to simulate literal displays defeats any attempt at structural translation. 3 Use of .RS/RE or list markup to produce indentation in examples and screenshots makes structural translation impossible. 4 \c is an obscure feature; third-party viewers sometimes don't intepret it. Plain \ is safer. 5 Two-digit year in .Dd macro. 6 Presentation-level use of SS could not be structurally translated. I changed lower-level instances to .TP. 7 This page wins an award for exceptionally creative and perverse abuse of list syntax. 8 C function syntax has extra paren. 9 I replaced '-->' with a troff right arrow, which doclifter will translate properly to an XML/HTML arrow glyph. 0 Function declarations had to be modified in order to fit into the DocBook DTD. This is not an error in troff usage, but it reduces the quality of the HTML that can be generated from this page through the DocBook toolchain. %% y|ac.1 | |sgk@sgk.tiac.net y|acl.5 |I |https://savannah.nongnu.org/bugs/index.php?39096 y|aconnect.1 | |tiwai@suse.de,alsa-devel@lists.sourceforge.net nA|admin.1posix |C |Francesco Paolo Lovergine nA|afm2pl.1 |I |tex-live@tug.org nA|aio.7 |I |mtk-manpages@gmx.net nA|american.5,english.5 |I9 |geoff@cs.hmc.edu y|amidi.1,amixer.1,aplay.1,arecord.1 | |Clemens Ladisch nA|amf.conf.5 |Y2 |Ubuntu Developers y|amrecover.8 | | nA|analog.1 |CZ |analog-author@lists.meer.net y|animate.1,compare.1,conjure.1,composite.1,convert.1,display.1,identify.1,import.1,mogrify.1,montage.1 | |magick-bugs@imagemagick.org y|apport-retrace.1 | |martin.pitt@ubuntu.com y|appres.1x | |xorg@lists.freedesktop.org 6nA|arp.7 |p |Bernd Eckenfels nA|as.1 |Zy |bug-binutils@gnu.org n|asn1_der_coding.3 |Ly |help-libtasn1@gnu.org n|asn1_write_value.3 |Jy |help-libtasn1@gnu.org y|aspell.1 | |pyro@debian.org y|atmsigd.conf.4 | |Werner.Almesberger@epfl.ch y|auditd.8 | |linux-audit@redhat.com nA|audit.rules.7 |a |linux-audit@redhat.com nA|auth_destroy.3 |I |mtk-manpages@gmx.net nA|authnone_create.3 |I |mtk-manpages@gmx.net nA|authunix_create.3 |I |mtk-manpages@gmx.net nA|authunix_create_default.3 |I |mtk-manpages@gmx.net y|awk.1 | |bug-gawk@gnu.org y|pgawk.1,gawk.1 | |bug-gawk@gnu.org n|barchart.3blt,stripchart.3blt |JG |gah@siliconmetrics.com n|graph.3blt |G |gah@siliconmetrics.com y|bash.1 | |bug-bash@gnu.org nA|bc.1 |J |bug-bc@gnu.org y|bgpd.8 | |bug-zebra@gnu.org 1p|bitmap.1 |oJ |xorg@lists.freedesktop.org p|BitmapBitOrder.3,BitmapPad.3,BitmapUnit.3,DisplayHeight.3,DisplayHeightMM.3,DisplayWidth.3,DisplayWidthMM.3,ImageByteOrder.3,XAddHosts.3|I |xorg@lists.freedesktop.org y|bounce.5,aliases.5,relocated.5,virtual.8| |wietse@porcupine.org 1nA|header_checks.5 |m |wietse@porcupine.org nA|bootparam.7 |Iu7 |mtk-manpages@gmx.net n|bridge.8 |C |netdev@vger.kernel.org nA|brltty.1 |J |BRLTTY@mielke.cc nA|btcflash.8 |J |Daniel Baumann y|bzadmin.6 | |bzflag-dev@lists.sourceforge.net y|bzfquery.6 | |bzflag-dev@lists.sourceforge.net n|bzfs.6 |o |bzflag-dev@lists.sourceforge.net n|bzr.1 |Js |bazaar@lists.canonical.com y|cadaver.1 | |Joe Orton nA|capabilities.7 |L |mtk-manpages@gmx.net nA|callrpc.3 |I |mtk-manpages@gmx.net y|cancel-cups.1,cancel.1,lp.1,lp-cups.1 | | y|cannastat.1 | |Canna@nec.co.jp y|cbrt.3,cbrtf.3,cbrtl.3| |mtk-manpages@gmx.net nA|cdparanoia.1 |L |monty@xiph.org y|cdrdao.1 | |cdrdao-devel@lists.sourceforge.net bnA|chat.8 |J |paulus@samba.org y|chcat.8 | |dwalsh@redhat.com 6nA|chmoddic.1 |C |Canna@nec.co.jp nA|chroot.2 |EL |bug-coreutils@gnu.org nA|clnt_broadcast.3 |I |mtk-manpages@gmx.net nA|clnt_call.3 |I |mtk-manpages@gmx.net nA|clnt_control.3 |I |mtk-manpages@gmx.net nA|clnt_create.3 |I |mtk-manpages@gmx.net nA|clnt_destroy.3 |I |mtk-manpages@gmx.net nA|clnt_freeres.3 |I |mtk-manpages@gmx.net nA|clnt_geterr.3 |I |mtk-manpages@gmx.net nA|clnt_pcreateerror.3 |I |mtk-manpages@gmx.net nA|clnt_perrno.3 |I |mtk-manpages@gmx.net nA|clnt_perror.3 |I |mtk-manpages@gmx.net nA|clnt_spcreateerror.3 |I |mtk-manpages@gmx.net nA|clnt_sperrno.3 |I |mtk-manpages@gmx.net nA|clnt_sperror.3 |I |mtk-manpages@gmx.net nA|clntraw_create.3 |I |mtk-manpages@gmx.net nA|clnttcp_create.3 |I |mtk-manpages@gmx.net nA|clntudp_bufcreate.3 |I |mtk-manpages@gmx.net nA|clntudp_create.3 |I |mtk-manpages@gmx.net 1nA|co.1 |o |rcs-bugs@gnu.org nA|codepage.1 |C |mckinstry@computer.org nA|compose.1,edit.1 |u |Brian White y|compress.1,uncompress.1 | |peter@ncs.nl nA|console_codes.4 |I |mtk-manpages@gmx.net nA|console_ioctl.4 |Iol |mtk-manpages@gmx.net nA|core.5 |I |mtk-manpages@gmx.net nA|corosync.conf.5 |LIY |Ubuntu Developers y|cpufreq-info.1,cpufreq-set.1 |I |linux@brodo.de,malattia@gmail.com nA|cpuset.7 |R |mtk-manpages@gmx.net y|crash.8 | |fenlason@redhat.com y|CrtImgType.3,Tk_InitImageArgs.3| |tcl-core@lists.sourceforge.net y|cshost.1 | |Canna@nec.co.jp y|cscope.1 | |broeker@users.sourceforge.net y|ctangle.1,cweave.1,cweb.1| | n|ctanify.1 |y |tex-live@tug.org y|curl.1 | | y|curl_formadd.3 |J |https://sourceforge.net/p/curl/bugs/1233 y|libcurl_tutorial.3 |J |https://sourceforge.net/p/curl/bugs/1234 1nA|cvs.1 |L |cvs-dev@nongnu.org bnA|cxpm.1 |W |lehors@sophia.inria.fr nA|dash.1,sh.1 |J |herbert@gondor.apana.org.au y|dasher.1 | | nA|DBD::Gofer.3pm |Jy | y|dbz.3 | |inn-bugs@isc.org nA|dcut.1 |R |Thomas Viehmann y|ddd.1 | |ddd@gnu.org nA|Parse::DebControl::Error.3pm|Wy |Jay Bonci nA|devnag.1 |J |Zdenek Wagner , tex-live@tug.org nA|dh_install.1 |iy |joeyh@debian.org nA|dh_movefiles.1 |Uy |joeyh@debian.org y|dhclient.8 | |dhcp-client@isc.org n|dhcp-eval.5 |J |Ubuntu Developers y|dicar.1 | |Canna@nec.co.jp y|dictfmt.1 | |faith@cs.unc.edu y|dictl.1 | |hilliard@debian.org, vle@gmx.net y|diffstat.1 | | nA|directomatic.1 |oG |till.kamppeter@gmail.com y|dislocate.1 | |Don Libes nA|dkms.8 |XJ |dkms-devel@dell.com y|dmraid.8 | |Heinz Mauelshagen nA|dpkg.1,dpkg-source.1 |L |debian-dpkg@lists.debian.org nA|dosbox.1 |L |dosbox-crew@gmail.com y|doxytag.1 | |doxygen-users@lists.sourceforge.net y|dpromdic.1 | | n|dragdrop.3blt |f |gah@siliconmetrics.com nA|dump-acct.8 |U |Daniel Baumann , Mathieu Trudel nA|duplicity.1 |t |Kenneth Loafman nA|dv2dt.1 |C |tex-live@tug.org y|dvgrab.1 | |nn4lyahoode@thyrsus.com 1nA|dvipdf.1,font2c.1 |R |epm@easysw.com nA|dvitodvi.1 |R |tex-live@tug.org y|dvipdfm.1 | |mwicks@kettering.edu 1n|editres.1 |I |xorg@lists.freedesktop.org 1nA|e2fsck.8 |o |tytso@thunk.org n|e2image.8 |J |tytso@thunk.org 1nA|efax.1 |Jug |edc@cce.com y|egrep.1,fgrep.1,grep.1| |bug-grep@gnu.org y|enscript.1 | |mtr@iki.fi y|elinkskeys.5 | |elinks-dev@linuxfromscratch.org y|emacs.1 | |bug-gnu-emacs@prep.ai.mit.edu y|epoll_ctl.2 | |aeb@cwi.nl, davidel@xmailserver.org y|epoll.4 | |aeb@cwi.nl, davidel@xmailserver.org y|eqn.1,geqn.1 | |bug-groff@gnu.org y|error.3 | |mtk-manpages@gmx.net nA|expire.ctl.5 |oY |Ubuntu Developers 1nA|openais_overview.8 |W |scd@broked.com y|cpg_overview.8,evs_overview | | nA|exiv2.1 |L |Andreas Huggel , KELEMEN Peter y|expect.1 | |Don Libes 1nA|extractres.1 |R |angus@harlequin.co.uk nA|f2py.1,f2py2.7.1 |C |f2py-users@cens.ioc.ee nA|faked-sysv.1,faked-tcp.1,faked.1,fakeroot-sysv.1,fakeroot-tcp.1,fakeroot.1|r |schizo@debian.org y|fbset.8 | |Geert.Uytterhoeven@cs.kuleuven.ac.be, zippel@fh-brandenburg.de nA|fence_drac.8 |J |cluster-devel@redhat.com nA|fence_na.8 |Wy |cluster-devel@redhat.com nA|fence_drac5.8 |J |cluster-devel@redhat.com y|fig2dev.1x | |bvsmith@lbl.gov 1nA|fig2ps2tex.1 |R |bvsmith@lbl.gov y|findchip.8,irdadump.8,irdaping.8,irpsion5.8,irattach.8| |wehe@tuxmobil.org nA|findhyph.1 |C |tex-live@tug.org y|firefox.1 | | y|flock.1 | |adam@yggdrasil.com nA|foo2hbpl2.1,foo2hbpl2-wrapper.1|1 |Rick Richardson y|foomatic-ppdfile.1 | |till.kamppeter@gmail.com 1nA|foomatic-rip.1,lpdomatic.8|oG |till.kamppeter@gmail.com nA|formail.1,lockfile.1,procmail.1,procmailex.5,procmailrc.5,procmailsc.5|K |srb@cuci.nl, guenther@sendmail.com y|forsort.1 | |Canna@nec.co.jp y|free.1 | |albert@users.sf.net nA|fsck.8,fsck.ext2.8,fsck.ext3.8,fsck.ext4.8,fsck.ext4dev.8|o |util-linux@vger.kernel.org nA|fsck.msdos.8,fsck.vfat.8,dosfsck.8|C |Daniel Baumann y|fsinfo.8 | |ezk@cs.columbia.edu nA|ftm.7 |D |mtk-manpages@gmx.net nA|fuser.1 |J |Werner Almesberger , Craig Small nA|fuzzyflakes.6x |C |Barry Dmytro nA|gacutil.1,cli-gacutil.1,gacutil2.1|N |mono-docs-list@lists.ximian.com y|gaim.1 | |Rob Flynn nA|gdb.1 |cJ |gdb-patches@sourceware.org nA|genisoimage.1 |o |debburn-devel@lists.alioth.debian.org 1nA|getafm.1 |R |rj@rainbow.in-berlin.de y|getcon.3,getexeccon.3 | |russell@coker.com.au y|getent.1 | |util-linux@vger.kernel.org nA|getpass.3 |L |mtk-manpages@gmx.net y|GetUid.3 | |tcl-core@lists.sourceforge.net nA|get_myaddress.3 |I |mtk-manpages@gmx.net y|getcontext.2 | |mtk-manpages@gmx.net y|getrpcport.3 | | nA|getty.8 |I |util-linux@vger.kernel.org y|gfdl.7 | | bnA|gftodvi.1 |I | y|ghostscript.1 | |giles@snow.thaumas.net y|gij.1 | | nA|gipddecode.1,hbpldecode.1|1 |Rick Richardson nA|gmcs.1 |L |mono-docs-list@lists.ximian.com y|gnome-session.1 | |Miguel de Icaza y|gnome-control-center.1| |Ubuntu Desktop Team nA|gnumeric.1 |L |gnumeric-list@gnome.org, Jan Schaumann , Adrian Custer y|gnuplot.1 | |gnuplot-info@lists.sourceforge.net y|gob2.1 | |George Lebl y|gpic.1,pic.1 | |bug-groff@gnu.org nA|gpm-types.7 |JC |gpm@lists.linux.it nA|grap.1 |Q |faber@lunabase.org y|grn.1 |C |bug-groff@gnu.org y|groff.1 | |bug-groff@gnu.org y|groff_diff.7 | |bug-groff@gnu.org y|groff_char.7 | |bug-groff@gnu.org y|groff_mdoc.7 | |bug-groff@gnu.org y|groff_me.7 |Io |bug-groff@gnu.org n|groff_mom.7 |s |bug-groff@gnu.org y|groffer.1 | |bug-groff@gnu.org y|grolj4.1,grops.1 | |bug-groff@gnu.org p|grodvi.1 |C7 |bug-groff@gnu.org nA|gs.1,ghostscript.1 |CY |Ubuntu Developers n|gthumb.1 |L |paolo.bacch@tin.it nA|gvcolor.1 |C |Stephen C. North , Emden R. Gansner nA|gvpr.1 |WI |Emden R. Gansner y|hformat.1,hmount.1 | |Robert Leslie 6nA|hfsutils.1 |HJ |Robert Leslie nA|hgrc.5 |H |mercurial-devel@selenic.com y|hidd.1 | |bluez-devel@lists.sourceforge.net y|hostname.1 | |Bernd Eckenfels nA|hosts_access.5,hosts.allow.5,hosts.deny.5,hosts_options.5|IY |Ubuntu Developers bA|hp-plugin.1 |v | nA|html2text.1 |C |Martin Bayer , Eugene V. Lyubimkin nA|html2textrc.5 |X |Martin Bayer y|htfuzzy.1 | |htdig-dev@htdig.org y|hwclock.8 | |bunk@stusta.de nA|hypertorus.6x |C |Carsten Steger y|ibod.1 | |Bjoern Smith y|ibod_cf.4 | |Bjoern Smith y|icc2ps.1,jpegicc.1 | |shiju.p@gmail.com 1nA|icclink.1 |E |shiju.p@gmail.com nA|icctrans.1 |L |shiju.p@gmail.com 1nA|tifficc.1 |E |shiju.p@gmail.com nA|icmp.7 |o |mtk-manpages@gmx.net nA|idmapd.conf.5 |XW |linux-nfs@vger.kernel.org y|idna_strerror.3,idna_to_ascii_4i.3,idna_to_ascii_4z.3,idna_to_ascii_8z.3,idna_to_ascii_lz.3,idna_to_unicode_44i.3,idna_to_unicode_4z4z.3,idna_to_unicode_8z4z.3,idna_to_unicode_8z8z.3,idna_to_unicode_8zlz.3,idna_to_unicode_lzlz.3,pr29_4.3,pr29_4z.3,pr29_8z.3,pr29_strerror.3,punycode_decode.3,punycode_strerror.3,stringprep.3,stringprep_4i.3,stringprep_4zi.3,stringprep_check_version.3,stringprep_convert.3,stringprep_locale_charset.3,stringprep_locale_to_utf8.3,stringprep_profile.3,stringprep_strerror.3,stringprep_ucs4_nfkc_normalize.3,stringprep_ucs4_to_utf8.3,stringprep_unichar_to_utf8.3,stringprep_utf8_nfkc_normalize.3,stringprep_utf8_to_locale.3,stringprep_utf8_to_ucs4.3,stringprep_utf8_to_unichar.3,tld_check_4.3,tld_check_4t.3,tld_check_4tz.3,tld_check_4z.3,tld_check_8z.3,tld_check_lz.3,tld_default_table.3,tld_get_4.3,tld_get_4z.3,tld_get_table.3,tld_get_z.3,tld_strerror.3 | |bug-libidn@gnu.org y|punycode_encode.3 | |bug-libidn@gnu.org nA|icedax.1 |AI |Heiko Eissfeldt , debburn-devel@lists.alioth.debian.org nA|ilbmtoppm.1 |L |bryanh@giraffe-data.com 1nA|includeres.1 |R |giles@artifex.com y|ImageMagick.1 | |magick-bugs-owner@imagemagick.org 1n|imake.1 |I |xorg@lists.freedesktop.org nA|inet.3 |IM |mtk-manpages@gmx.net y|inews.1 | |inn-bugs@isc.org y|init.5 |I |https://bugs.launchpad.net/upstart/+bug/1185108 nA|innfeed.8 |B |inn-bugs@isc.org nA|inotify.7 |I |mtk-manpages@gmx.net y|install.1 | |bug-coreutils@gnu.org y|intel.4 | |xorg@lists.freedesktop.org n|intel_panel_fitter.1 |E |intel-gfx@lists.freedesktop.org nA|IO::WrapTie.3pm |WC |David F. Skoll n|ip-netns.8,ip-maddress.8,ip-tunnel.8,ip-route.8|6 |netdev@vger.kernel.org n|ip-neighbour.8 |6QI |netdev@vger.kernel.org n|ip-rule.8 |6I |netdev@vger.kernel.org nA|ipcrm.1 |C |util-linux@vger.kernel.org 1nA|ipppd.8 |L |keil@isdn4linux.de y|ip6tables.8 | |netfilter-devel@lists.netfilter.org 1nA|iptables.8 |CJL |netfilter-devel@lists.netfilter.org nA|ip6tables-save.8 |U |netfilter-devel@lists.netfilter.org nA|ipptoolfile.5 |J |cups-dev@easysw.com y|iptraf.8 | |riker@seul.org 1nA|ipv6calc.8 |Lo |pb@bieringer.de y|irb.1 | |ruby-doc@ruby-lang.org nA|irda.7 |0 |Jean Tourrilhes y|irnet.4 | |jt@hpl.hp.com y|irsend.1 | |lirc@bartelmus.de y|isadump.8,isaset.8 | |phil@philedelbrock.com nA|ispell.1,buildhash.1,munchlist.1,findaffix.1,tryaffix.1,icombine.1,ijoin.1|C |ispell-bugs@itcorp.com nA|ispell-wrapper.1 |CY |Ubuntu Developers nA|kioclient.1 |k |Kubuntu Developers y|lamboot.1 | |lam-devel@lam-mpi.org n|lamd.1 | |lam-devel@lam-mpi.org nA|lam.7,LAM.7 |L |lam-devel@lam-mpi.org nA|lam-helpfile.5 |I |lam-devel@lam-mpi.org b|lastcomm.1 |I |https://savannah.gnu.org/bugs/index.php?39134 y|lastlog.8 | | y|latex.1 | |te@dbs.uni-hannover.de nA|latin2.7,iso_8859-2.7,iso_8859_2.7,iso-8859-2.7|* |mtk-manpages@gmx.net y|LDP.7 | | nA|ld-linux.8,ld-linux.so.8|L |mtk-manpages@gmx.net 1nA|ld.so.8 |L |mtk-manpages@gmx.net 6nA|less.1,pager.1 |J |bug-less@gnu.org nA|lftp.1 |I |lav@yars.free.net, nA|libcaca-authors.3caca|W |Sam Hocevar nA|libcaca-canvas.3caca |WJ |Sam Hocevar nA|libcaca-env.3caca |WL |Sam Hocevar nA|libcaca-font.3caca |WJ |Sam Hocevar nA|libcaca-ruby.3caca |W |Sam Hocevar nA|libcaca-tutorial.3caca|W |Sam Hocevar 4nA|libpng.3 |SJ |png-mng-implement@lists.sourceforge.net y|libpngpf.3 | |png-mng-implement@lists.sourceforge.net b|libreoffice.1,loffice.1,lofromtemplate.1|J |https://bugs.freedesktop.org/show_bug.cgi?id=65243 nA|libtiff.3tiff |I |tiff@lists.maptools.org y|licensecheck.1 | |Ubuntu Developers nA|list_audio_tracks.1 |W |Heiko Eissfeldt , debburn-devel@lists.alioth.debian.org 1nA|ln.1 |j |bug-coreutils@gnu.org y|locate.1 | |mitr@redhat.com n|locate.findutils.1 |U |bug-findutils@gnu.org nA|logger.1 |O |util-linux@vger.kernel.org y|logrotate.8 | | nA|logsys_overview.8 |JY |Ubuntu Developers y|indxbib.1 | |bug-groff@gnu.org n|lkbib.1 |C |bug-groff@gnu.org y|lookbib.1 | |bug-groff@gnu.org 6nA|lpr.1 |U |papowell@lprng.com y|lpstat.1,lpstat-cups.1| |papowell@lprng.com y|lsof.8 | |abe@purdue.edu nA|lynx.1,www-browser.1 |C |lynx-dev@nongnu.org y|mag.1 | |te@dbs.uni-hannover.de nA|makeindex.1 |J |tex-live@tug.org y|man.1,manpath.1 | |mtk-manpages@gmx.net nA|mawk.1 |R |http://code.google.com/p/original-mawk/issues/detail?id=21&thanks=21&ts=1369758804 y|mcs.8 | | y|mdel.1 | |mtools@mtools.linux.lu nA|mdoc.7 |J |mtk-manpages@gmx.net y|merge.1 | |bug-rcs@gnu.org y|mev.1 | |gpm@lists.linux.it y|mf.1,inimf.1,virmf.1 | |te@dbs.uni-hannover.de nA|mkdosfs.8,mkfs.msdos.8,mkfs.vfat.8|C |Daniel Baumann y|mkdtemp.3 | |mtk-manpages@gmx.net nA|mkjobtexmf.1 |Ly |tex-live@tug.org y|mkzftree.1 | |hpa@zytor.com nA|mlocate.db.5 |J |Miloslav Trmac nA|mono.1,cli.1 |JX |mono-docs-list@lists.ximian.com nA|mono-config.5 |X |mono-docs-list@lists.ximian.com nA|more.1 |O |util-linux@vger.kernel.org y|motd.news.5 | |inn-bugs@isc.org y|mount.fuse.8 | |fuse-devel@lists.sourceforge.net y|mozplugger.7 | |louis@bavoil.net y|mpcd.8 | |Heikki Vatiainen , Sampo Saaristo y|mpiexec.1 | |lam-devel@lam-mpi.org y|mpiexec.lam.1 | |lam-devel@lam-mpi.org y|mpimsg.1,mpitask.1 | |lam-devel@lam-mpi.org n|mpirun.1,mpirun.lam.1 |L |lam-devel@lam-mpi.org y|mpost.1 | | nA|mq_overview.7 |I |mtk-manpages@gmx.net n|mtools.5,mtools.conf.5|X |mtools@mtools.linux.lu nA|mtr.8 |J |mtr@lists.xmission.com y|mtx.1 | |eric@badtux.org nA|mutt.1 |JQ |mutt-dev@mutt.org nA|muttrc.5 |JXu |mutt-dev@mutt.org y|mysqld.1,mysqld_multi.1,mysqldump.1,mysql_zap.1,mysqladmin.1,mysqlshow.1| |monty@tcx.se y|named.conf.5 | |bind9-bugs@isc.org y|nasm.1,ndisasm.1 | |nasm-devel@lists.sourceforge.net n|nautilus.1 |L |Ubuntu Desktop Team nA|nautilus-connect-server.1|L|nautilus-list@gnome.org y|nbp_name.3 | |netatalk-devel@lists.sourceforge.net nA|netpbm.1 |J |bryanh@giraffe-data.com nA|netstat.8 |Cz |ecki@linux.de y|newgrp.1 | |Julianne Frances Haugh y|nfsd.7 | |neilb@cse.unsw.edu.au nA|nfsmount.conf.5 |CY |ubuntu-devel-discuss@lists.ubuntu.com y|NetworkManager.1,nm-tool.1 | |networkmanager-list@gnome.org n|nmcli.1,nm-connection-editor.1|WX |networkmanager-list@gnome.org nA|nsgmls.1 |CI |James Clark y|nslookup.1 | | nA|ntfs-3g.secaudit.8 |CY |Ubuntu Developers nA|ntfs-3g.usermap.8 |C |ntfs-3g-devel@lists.sf.net y|ntpdate.1 | |mills@udel.edu y|ntpq.1 | |mills@udel.edu nA|nvidia-settings.1 |IxY |ubuntu-devel-discuss@lists.ubuntu.com nA|nvidia-smi.1 |I6Y |ubuntu-devel-discuss@lists.ubuntu.com y|octave-config.1 | |jwe@bevo.che.wisc.edu, edd@debian.org nA|ode.1 |e |bug-gnu-utils@gnu.org nA|oldfind.1,find.1 |J |findutils-patches@gnu.org nA|omfonts.1 |W |tex-live@tug.org y|on_ac_power.1 | |Richard Hughes 1nA|openvt.1,open.1 |L |aeb@cwi.nl y|operator.7 | |mtk-manpages@gmx.net nA|orbd.1 |WyY |ubuntu-devel-discuss@lists.ubuntu.com nA|orca.1 |s |orca-list@gnome.org nA|osage.1,mm2gv.1 |J |Emden R. Gansner y|parted.8 | |bug-parted@gnu.org nA|patch.1 |It |bug-patch@gnu.org nA|pax.1posix |WJL |Francesco Paolo Lovergine nA|pbmclean.1,pnmcomp.1,pnmnorm.1,pnmpad.1,pnmquant.1,pnmremap.1,pnmtotiff.1,pgmnorm.1,ppmcolors.1,ppmnorm.1,ppmntsc.1,ppmquant.1,ppmrainbow.1,ppmtogif.1,ppmtoxpm.1,tifftopnm.1|C |bryanh@giraffe-data.com nA|pbget.1,pbput.1,pbputs.1 |W |Dustin Kirkland nA|pbmtextps.1 |C |Bryan Henderson nA|pcap-filter.7 |I |tcpdump-workers@lists.tcpdump.org y|pcre.3,pcrebuild.3 | |ph10@cam.ac.uk nA|pcreapi.3 |I |http://bugs.exim.org/show_bug.cgi?id=1359 nA|pcreposix.3 |H |http://bugs.exim.org/show_bug.cgi?id=1360 y|pcreprecompile.3 | |ph10@cam.ac.uk y|pcrepattern.3,pcrecallout.3,pcrepartial.3| |ph10@cam.ac.uk y|pdfseparate.1 ` | |poppler@lists.freedesktop.org y|pgmabel.1,pgmtopgm.1,pnmstitch.1,pgmmorphconv.1,pnmtoddif.1,ppmtopj.1 | |Bryan Henderson y|php.1 | |phpdoc@lists.php.net nA|pidgin.1 |T |Sean Egan , Ben Tegarden , John Bailey y|pipe.8 | |wietse@porcupine.org nA|pkg-config.1 |q |pkg-config@lists.freedesktop.org y|play.1 | | nA|plot.1,plotfont.1 |W |bug-gnu-utils@gnu.org nA|pmap_getmaps.3 |I |mtk-manpages@gmx.net nA|pmap_getport.3 |I |mtk-manpages@gmx.net nA|pmap_rmtcall.3 |I |mtk-manpages@gmx.net nA|pmap_set.3 |I |mtk-manpages@gmx.net nA|pmap_unset.3 |I |mtk-manpages@gmx.net nA|pnmhisteq.1,ppmcie.1,ppmlabel.1,sbigtopgm.1|R |Bryan Henderson nA|pnmpaste.1 |X |Bryan Henderson nA|pnmtotiffcmyk.1 |C |Bryan Henderson nA|pnmtofiasco.1 |e |Bryan Henderson nA|policytool.1 |Wy |openjdk@lists.launchpad.net y|servertool.1 | |openjdk@lists.launchpad.net y|postconf.5 | |wietse@porcupine.org y|postmap.1,postsuper.1 | |wietse@porcupine.org nA|proc.5 |Io |mtk-manpages@gmx.net y|ps.1 | |acahalan@cs.uml.edu n|pstree.1,pstree.x11.1 |C |Craig Small bA|pstops.1 |R | y|proxymap.8 | |wietse@porcupine.org 6nA|ps2epsi.1 |j |giles@artifex.com y|ps2pdf.1,ps2pdf12.1,ps2pdf13.1| |giles@artifex.com nA|ps2pdfwr.1 |R |giles@artifex.com 1nA|psnup.1 |J |giles@artifex.com 1nA|pthreads.7 |I |mtk-manpages@gmx.net 6nA|ptx.1 |j |bug-gnu-utils@gnu.org nA|pytest.1 |C |doc-sig@python.org y|quotactl.2 | |jkar8572@sers.sourceforge.net 1nA|qos.7 |L |linux-atm-general@lists.sourceforge.net nA|qsub.1posix |I |Francesco Paolo Lovergine y|racoon.conf.5 | |bugs@lists.freeswan.org n|radeon.4 |L |xorg@lists.freedesktop.org y|random.4 | |mtk-manpages@gmx.net n|rcsfile.5 |d |rcs-bugs@gnu.org y|ram.4 | |mtk-manpages@gmx.net y|raw2tiff.1,tiffcmp.1 | |tiff@lists.maptools.org bA|rc-alert.1 |u |Julian Gilbey , Adam D. Barratt y|rcsintro.1 |u |bug-rcs@gnu.org y|refer.1 | |bug-groff@gnu.org nA|registerrpc.3 |I |mtk-manpages@gmx.net nA|regulatory.bin.5 |w |linux-wireless@vger.kernel.org bA|renice.1 |O |http://userweb.kernel.org/~kzak/util-linux/ y|replace.1,isamchk.1,isamlog.1| |monty@tcx.se y|resize2fs.8 | |tytso@thunk.org y|rexec.3 | | bA|rev.1 |OL |http://userweb.kernel.org/~kzak/util-linux/ y|rdump.8,dump.8,restore.8,rrestore.8 | |Stelian Pop nA|rhythmbox-client.1 |L |Sven Arvidsson , gnome-doc-list@gnome.org nA|rlog.1 |L |rcs-bugs@gnu.org nA|rlogin.1 |nY |Ubuntu Developers nA|rlwrap.1,readline-editor.1|J |Chet Ramey nA|rmid.1 |Wy |openjdk@lists.launchpad.net nA|rmiregistry.1 |Wy |openjdk@lists.launchpad.net y|roff.7 | |bug-groff@gnu.org nA|rotatelogs.8 |* |docs@httpd.apache.org nA|rpc.3 |I |mtk-manpages@gmx.net nA|rpc.5 |c |mtk-manpages@gmx.net nA|rsh.1,ssh.1,authorized_keys.5,sshd.8|nY |Ubuntu Developers p|rstartd.1 |I |xorg@lists.freedesktop.org nA|rsyslog.conf.5 |J |rsyslog@lists.adiscon.com n|ruby.1,ruby1.9.1.1 |L |ruby-doc@ruby-lang.org p|s3.4 |I |xorg@lists.freedesktop.org nA|sane-apple.5 |L |https://alioth.debian.org/tracker/index.php?func=detail&aid=314280&group_id=30186&atid=410366 nA|sane-lexmark.5 |Lo |https://alioth.debian.org/tracker/index.php?func=detail&aid=314281&group_id=30186&atid=410366 nA|sane-mustek_pp.5 |Lo |https://alioth.debian.org/tracker/index.php?func=detail&aid=314282&group_id=30186&atid=410366 y|sane-pixma.5 |W |https://alioth.debian.org/tracker/index.php?func=detail&aid=314283&group_id=30186&atid=410366 y|scons.1 | |scons-dev@scons.org nA|scons-time.1 |LZ |scons-dev@scons.org nA|script.1 |O |util-linux@vger.kernel.org 1nA|SDL_Init.3 |L |sdl@lists.libsdl.org nA|SDL_CDPlayTracks.3 |8 |docs@lists.libsdl.org y|security.3 | |xorg@lists.freedesktop.org nA|see.1,run-mailcap.1,print.1 |C |Brian White y|send-uucp.8 | |inn-bugs@isc.org nA|setcap.8 |C |Andrew G. Morgan y|setpci.8 | |Martin Mares y|sg_senddiag.8,sg_wr_mode.8 | |dgilbert@interlog.com nA|sg_sat_phy_event.8 |C |dgilbert@interlog.com nA|sgmlspl.1 |L |Ardo van Rangelrooij nA|signal.7 |I |mtk-manpages@gmx.net y|sk98lin.4 | |linux@syskonnect.de y|slapd.8 | |OpenLDAP-devel@OpenLDAP.org y|slapdn.8,slapacl.8,slapadd.8 |u |OpenLDAP-devel@OpenLDAP.org nA|slapd.conf.5 |LI |OpenLDAP-devel@OpenLDAP.org nA|slapd-config.5 |LI |OpenLDAP-devel@OpenLDAP.org nA|slapo-constraint.5 |L |OpenLDAP-devel@OpenLDAP.org nA|slogin.1 |n |openssh-unix-dev@mindrot.org y|snmpvacm.1 | |net-snmp-coders@lists.sourceforge.net y|snmpd.conf.5 | |net-snmp-coders@lists.sourceforge.net y|snmp.conf.5 | |net-snmp-coders@lists.sourceforge.net nA|snmpd.examples.5snmp |J |net-snmp-coders@lists.sourceforge.net y|socket-event.7 | |https://bugs.launchpad.net/upstart/+bug/1018925 bA|software-properties-gtk.1 |W | nA|spam.1 |C | nA|spufs.7 |I |mtk-manpages@gmx.net y|squid_ldap_auth.8,squid_ldap_group.8 | |squid-bugs@squid-cache.org 3pA|sshd_config.5,ssh_config.5 |n |brad@openbsd.org nA|ssh-keygen.1 |Rn |Colin Watson y|states.1 | |mtr@iki.fi n|sudoers.5 |n |Todd C. Miller nA|svc_destroy.3 |I |mtk-manpages@gmx.net nA|svc_freeargs.3 |I |mtk-manpages@gmx.net nA|svc_getargs.3 |I |mtk-manpages@gmx.net nA|svc_getcaller.3 |I |mtk-manpages@gmx.net nA|svc_getreq.3 |I |mtk-manpages@gmx.net nA|svc_getreqset.3 |I |mtk-manpages@gmx.net nA|svc_register.3 |I |mtk-manpages@gmx.net nA|svc_run.3 |I |mtk-manpages@gmx.net nA|svc_sendreply.3 |I |mtk-manpages@gmx.net nA|svc_unregister.3 |I |mtk-manpages@gmx.net nA|svcerr_auth.3 |I |mtk-manpages@gmx.net nA|svcerr_decode.3 |I |mtk-manpages@gmx.net nA|svcerr_noprog.3 |I |mtk-manpages@gmx.net nA|svcerr_noproc.3 |I |mtk-manpages@gmx.net nA|svcerr_progvers.3 |I |mtk-manpages@gmx.net nA|svcerr_systemerr.3 |I |mtk-manpages@gmx.net nA|svcerr_weakauth.3 |I |mtk-manpages@gmx.net nA|svcfd_create.3 |I |mtk-manpages@gmx.net nA|svcraw_create.3 |I |mtk-manpages@gmx.net nA|svctcp_create.3 |I |mtk-manpages@gmx.net nA|svcudp_bufcreate.3 |I |mtk-manpages@gmx.net nA|svcudp_create.3 |I |mtk-manpages@gmx.net y|synclient.1 | |mtk-manpages@gmx.net nA|synctex.1 |5 |tex-live@tug.org 6nA|rb.1,rx.1,rz.1,sb.1,sx.1,sz.1|e |Uwe Ohse nA|tar.1 |CV |bug-tar@gnu.org nA|tc-prio.8,tc-htb.8,tc-cbq.8,tc-cbq-details.8 |C |net@vger.kernel.org nA|tc-stab.8 |IJ |net@vger.kernel.org 1nA|tcpd.8 |I |wietse@porcupine.org nA|tcpdmatch.8 |I |wietse@porcupine.org nA|tek2plot.1 |W |bug-gnu-utils@gnu.org nA|telnet.1,telnet.netkit.1|XY |Ubuntu Developers nA|test.1,[.1 | |bug-coreutils@gnu.org y|texdoctk.1 | |bunk@fs.tum.de, ruedas@geophysik.uni-frankfurt.de bA|terminfo.5 |Ia |bug-ncurses@gnu.org y|tfmtodit.1 | |bug-groff@gnu.org nA|TIFFGetField.3tiff |I |tiff@lists.maptools.org nA|TIFFmemory.3tiff |4 |tiff@lists.maptools.org nA|Tk::Internals.3pm |WY |Ubuntu Developers nA|tnameserv.1 |Wy |openjdk@lists.launchpad.net nA|tgatoppm.1 |A |bryanh@giraffe-data.com 1nA|tidy.1 |Wm |tidy-develop@lists.sourceforge.net,html-tidy@w3.org y|time.1 | |bug-gnu-utils@prep.ai.mit.edu n|top.1 |XoQ |Jim Warner y|transfig.1x | |bvsmith@lbl.gov 1nA|tree.1 |b |Steve Baker nA|ttf2tfm.1 |Io |tex-live@tug.org nA|tty_ioctl.4 |L |mtk-manpages@gmx.net 6nA|tune2fs.8 |C |tytso@thunk.org n|tzfile.5 |I |mtk-manpages@gmx.net y|udevd.8,udevsend.8 | |Kay Sievers y|units.1 | |adrian@cam.cornell.edu nA|unity-2d-shell.1 |CJY |Ubuntu Developers nA|unity-2d-spread.1 |CY |Ubuntu Developers nA|upstart-events.7 |I |James Hunt n|uscan.1 |J |Julian Gilbey nA|usb-creator-gtk.8 |W |Evan Dandrea , Roderick B. Greening nA|xz.1,xzcat.1,unxz.1,unlzma.1,lzcat.1,lzma.1|C |lasse.collin@tukaani.org nA|unshare.1 |L |util-linux@vger.kernel.org y|unzip.1,unzipsfx.1 | |roelofs@pobox.com nA|updatedb.conf.5 |J |Miloslav Trmac y|uuencode.1 | |bug-gnu-utils@gnu.org nA|uuencode.1posix |I |Francesco Paolo Lovergine n|vector.3blt |LIG3 |gah@siliconmetrics.com p|viewres.1 |I |xorg@lists.freedesktop.org n*A|vlna.1 |F |tex-live@tug.org y|vmstat.8 | |Henry Ware , Fabian Frédérick 1nA|wall.1 |LO |util-linux@vger.kernel.org n|weechat-curses.1 |s |Sebastien Helleu y|wget.1 | |mtortonesi@ing.unife.it 1nA|whereis.1 |L |bunk@stusta.de y|which.1 | | nA|whois.1 |LY |Ubuntu Developers p|XAddHost.3 |I |xorg@lists.freedesktop.org 1n|XAllocWMHints.3 |I |xorg@lists.freedesktop.org y|Xaw.3x | |xorg@lists.freedesktop.org y|XcmsColor.3x | |xorg@lists.freedesktop.org y|XDrawArc.3x | |xorg@lists.freedesktop.org nA|xdr.3,xdr_array.3 |I |mtk-manpages@gmx.net p|X.7 |ILo |xorg@lists.freedesktop.org y|XQueryColor.3x | |xorg@lists.freedesktop.org y|XLoadFont.3x | |xorg@lists.freedesktop.org y|XrmGetFileDatabase.3x | |xorg@lists.freedesktop.org p|XSizeHints.3 |I |xorg@lists.freedesktop.org p|XAllocClassHint.3 |I |xorg@lists.freedesktop.org p|XAllocIconSize.3 |I |xorg@lists.freedesktop.org p|XAllocSizeHints.3 |I |xorg@lists.freedesktop.org p|XAllocStandardColormap.3 |I |xorg@lists.freedesktop.org p|XAnyEvent.3 |I |xorg@lists.freedesktop.org p|XAutoRepeatOn.3 |I |xorg@lists.freedesktop.org p|XAutoRepeatOff.3 |I |xorg@lists.freedesktop.org p|XBell.3 |I |xorg@lists.freedesktop.org p|XButtonEvent.3 |I |xorg@lists.freedesktop.org p|XChangeGC.3 |I |xorg@lists.freedesktop.org p|XChangeKeyboardControl.3|I |xorg@lists.freedesktop.org p|XChangeKeyboardMapping.3|I |xorg@lists.freedesktop.org p|XCirculateEvent.3 |I |xorg@lists.freedesktop.org p|XCirculateRequestEvent.3 |I |xorg@lists.freedesktop.org p|XClassHint.3 |I |xorg@lists.freedesktop.org p|XClientMessageEvent.3 |I |xorg@lists.freedesktop.org p|XColor.3 |I |xorg@lists.freedesktop.org p|XColormapEvent.3 |I |xorg@lists.freedesktop.org p|XConfigureEvent.3 |I |xorg@lists.freedesktop.org p|XConfigureRequestEvent.3 |I |xorg@lists.freedesktop.org p|XConfigureWindow.3 |I |xorg@lists.freedesktop.org p|XCopyColormapAndFree.3|I |xorg@lists.freedesktop.org p|XCopyGC.3 |I |xorg@lists.freedesktop.org p|XCreateColormap.3 |I |xorg@lists.freedesktop.org p|XCreateGC.3 |I |xorg@lists.freedesktop.org p|XCreateSimpleWindow.3 |I |xorg@lists.freedesktop.org p|XCreateWindow.3 |I |xorg@lists.freedesktop.org p|XCreateWindowEvent.3 |I |xorg@lists.freedesktop.org p|XCrossingEvent.3 |I |xorg@lists.freedesktop.org p|XDefaultString.3 |I |xorg@lists.freedesktop.org p|XDeleteModifiermapEntry.3 |I |xorg@lists.freedesktop.org p|XDestroyWindowEvent.3 |I |xorg@lists.freedesktop.org p|XDisableAccessControl.3 |I |xorg@lists.freedesktop.org p|XDisplayKeycodes.3 |I |xorg@lists.freedesktop.org p|XDisplayMotionBufferSize.3 |I |xorg@lists.freedesktop.org p|XDrawLine.3 |I |xorg@lists.freedesktop.org p|XDrawLines.3 |I |xorg@lists.freedesktop.org p|XDrawPoint.3 |I |xorg@lists.freedesktop.org p|XDrawPoints.3 |I |xorg@lists.freedesktop.org p|XDrawRectangle.3 |I |xorg@lists.freedesktop.org p|XDrawRectangles.3 |I |xorg@lists.freedesktop.org p|XDrawSegments.3 |I |xorg@lists.freedesktop.org p|XDrawText.3 |I |xorg@lists.freedesktop.org p|XDrawText16.3 |I |xorg@lists.freedesktop.org p|XEnableAccessControl.3|I |xorg@lists.freedesktop.org p|XErrorEvent.3 |I |xorg@lists.freedesktop.org p|XEvent.3 |I |xorg@lists.freedesktop.org p|XExposeEvent.3 |I |xorg@lists.freedesktop.org p|XFocusChangeEvent.3 |I |xorg@lists.freedesktop.org p|XFontSetExtents.3 |I |xorg@lists.freedesktop.org p|XFreeColormap.3 |I |xorg@lists.freedesktop.org p|XFreeEventData.3 |I |xorg@lists.freedesktop.org p|XFreeGC.3 |I |xorg@lists.freedesktop.org p|XFreeModifiermap.3 |I |xorg@lists.freedesktop.org p|XFreeStringList.3 |I |xorg@lists.freedesktop.org n|XF86VM.3 |I |xorg@lists.freedesktop.org p|XGCValues.3 |I |xorg@lists.freedesktop.org p|XGContextFromGC.3 |I |xorg@lists.freedesktop.org p|XGenericEventCookie.3 |I |xorg@lists.freedesktop.org p|XGetClassHint.3 |I |xorg@lists.freedesktop.org p|XGetEventData.3 |I |xorg@lists.freedesktop.org p|XGetGCValues.3 |I |xorg@lists.freedesktop.org p|XGetGeometry.3 |I |xorg@lists.freedesktop.org p|XGetIconSizes.3 |I |xorg@lists.freedesktop.org p|XGetKeyboardControl.3 |I |xorg@lists.freedesktop.org p|XGetKeyboardMapping.3 |I |xorg@lists.freedesktop.org p|XGetModifierMapping.3 |I |xorg@lists.freedesktop.org p|XGetMotionEvents.3 |I |xorg@lists.freedesktop.org p|XGetRGBColormaps.3 |I |xorg@lists.freedesktop.org p|XGetVisualInfo.3 |I |xorg@lists.freedesktop.org p|XGetWMHints.3 |I |xorg@lists.freedesktop.org p|XGetWMNormalHints.3 |I |xorg@lists.freedesktop.org p|XGetWMSizeHints.3 |I |xorg@lists.freedesktop.org p|XGetWindowAttributes.3|I |xorg@lists.freedesktop.org n|XGetXCBConnection.3,XSetEventQueueOwner.3 |X |xcb@lists.freedesktop.org p|XGraphicsExposeEvent.3|I |xorg@lists.freedesktop.org p|XGravityEvent.3 |I |xorg@lists.freedesktop.org p|XHostAddress.3 |I |xorg@lists.freedesktop.org p|XIconSize.3 |I |xorg@lists.freedesktop.org p|XInsertModifiermapEntry.3 |I |xorg@lists.freedesktop.org p|XKeyEvent.3 |I |xorg@lists.freedesktop.org p|XKeyboardControl.3 |I |xorg@lists.freedesktop.org p|XKeymapEvent.3 |I |xorg@lists.freedesktop.org p|XListHosts.3 |I |xorg@lists.freedesktop.org p|XListPixmapFormats.3 |I |xorg@lists.freedesktop.org p|XMapEvent.3 |I |xorg@lists.freedesktop.org p|XMapRequestEvent.3 |I |xorg@lists.freedesktop.org p|XMappingEvent.3 |I |xorg@lists.freedesktop.org p|XMatchVisualInfo.3 |I |xorg@lists.freedesktop.org p|XModifierKeymap.3 |I |xorg@lists.freedesktop.org p|XMotionEvent.3 |I |xorg@lists.freedesktop.org p|XMoveResizeWindow.3 |I |xorg@lists.freedesktop.org p|XMoveWindow.3 |I |xorg@lists.freedesktop.org p|XNewModifiermap.3 |I |xorg@lists.freedesktop.org p|XNoExposeEvent.3 |I |xorg@lists.freedesktop.org p|XPixmapFormatValues.3 |I |xorg@lists.freedesktop.org p|XPoint.3 |I |xorg@lists.freedesktop.org p|XPropertyEvent.3 |I |xorg@lists.freedesktop.org p|XQueryKeymap.3 |I |xorg@lists.freedesktop.org p|XRectangle.3 |I |xorg@lists.freedesktop.org p|XRemoveHost.3 |I |xorg@lists.freedesktop.org p|XRemoveHosts.3 |I |xorg@lists.freedesktop.org p|XReparentEvent.3 |I |xorg@lists.freedesktop.org p|XResizeRequestEvent.3 |I |xorg@lists.freedesktop.org p|XResizeWindow.3 |I |xorg@lists.freedesktop.org p|XSegment.3 |I |xorg@lists.freedesktop.org p|XSelectionClearEvent.3|I |xorg@lists.freedesktop.org p|XSelectionEvent.3 |I |xorg@lists.freedesktop.org p|XSelectionRequestEvent.3 |I |xorg@lists.freedesktop.org p|XSendEvent.3 |I |xorg@lists.freedesktop.org p|XSetAccessControl.3 |I |xorg@lists.freedesktop.org p|XSetClassHint.3 |I |xorg@lists.freedesktop.org p|XSetIconSizes.3 |I |xorg@lists.freedesktop.org p|XSetModifierMapping.3 |I |xorg@lists.freedesktop.org p|XSetRGBColormaps.3 |I |xorg@lists.freedesktop.org p|XSetWMHints.3 |I |xorg@lists.freedesktop.org p|XSetWMNormalHints.3 |I |xorg@lists.freedesktop.org p|XSetWMSizeHints.3 |I |xorg@lists.freedesktop.org p|XSetWindowAttributes.3 |I |xorg@lists.freedesktop.org p|XSetWindowBorderWidth.3 |I |xorg@lists.freedesktop.org p|XShape.3 |Iu |xorg@lists.freedesktop.org p|XShapeCombineMask.3 |Iu |xorg@lists.freedesktop.org p|XShapeCombineRectangles.3 |Iu |xorg@lists.freedesktop.org p|XShapeCombineRegion.3 |Iu |xorg@lists.freedesktop.org p|XShapeCombineShape.3 |Iu |xorg@lists.freedesktop.org p|XShapeGetRectangles.3 |Iu |xorg@lists.freedesktop.org p|XShapeInputSelected.3 |Iu |xorg@lists.freedesktop.org p|XShapeOffsetShape.3 |Iu |xorg@lists.freedesktop.org p|XShapeQueryExtension.3|Iu |xorg@lists.freedesktop.org p|XShapeQueryExtents.3 |Iu |xorg@lists.freedesktop.org p|XShapeQueryVersion.3 |Iu |xorg@lists.freedesktop.org p|XShapeSelectInput.3 |Iu |xorg@lists.freedesktop.org p|XStandardColormap.3 |I |xorg@lists.freedesktop.org p|XStringListToTextProperty.3 |I |xorg@lists.freedesktop.org p|XTextItem.3 |I |xorg@lists.freedesktop.org p|XTextItem16.3 |I |xorg@lists.freedesktop.org p|XTextProperty.3 |I |xorg@lists.freedesktop.org p|XTextPropertyToStringList.3 |I |xorg@lists.freedesktop.org p|XTimeCoord.3 |I |xorg@lists.freedesktop.org p|XUnmapEvent.3 |I |xorg@lists.freedesktop.org p|XVisibilityEvent.3 |I |xorg@lists.freedesktop.org p|XVisualIDFromVisual.3 |I |xorg@lists.freedesktop.org p|XVisualInfo.3 |I |xorg@lists.freedesktop.org p|XWMHints.3 |I |xorg@lists.freedesktop.org p|XWindowAttributes.3 |I |xorg@lists.freedesktop.org p|XWindowChanges.3 |I |xorg@lists.freedesktop.org p|Xau.3 |I |xorg@lists.freedesktop.org p|XkbGetNamedGeometry.3 |I |xorg@lists.freedesktop.org p|XkbSASetGroup.3 |I |xorg@lists.freedesktop.org p|XkbSetDetectableAutoRepeat.3 |I |xorg@lists.freedesktop.org p|XkbSetDeviceButtonActions.3 |I |xorg@lists.freedesktop.org p|XmbDrawText.3 |I |xorg@lists.freedesktop.org p|XmbTextListToTextProperty.3 |I |xorg@lists.freedesktop.org p|XmbTextPropertyToTextList.3 |I |xorg@lists.freedesktop.org p|Xmbuf.3 |I |xorg@lists.freedesktop.org p|XmbufChangeBufferAttributes.3 |I |xorg@lists.freedesktop.org p|XmbufChangeWindowAttributes.3 |I |xorg@lists.freedesktop.org p|XmbufCreateBuffers.3 |I |xorg@lists.freedesktop.org p|XmbufCreateStereoWindow.3 |I |xorg@lists.freedesktop.org p|XmbufDestroyBuffers.3 |I |xorg@lists.freedesktop.org p|XmbufDisplayBuffers.3 |I |xorg@lists.freedesktop.org p|XmbufGetBufferAttributes.3 |I |xorg@lists.freedesktop.org p|XmbufGetScreenInfo.3 |I |xorg@lists.freedesktop.org p|XmbufGetVersion.3 |I |xorg@lists.freedesktop.org p|XmbufGetWindowAttributes.3 |I |xorg@lists.freedesktop.org p|XmbufQueryExtension.3 |I |xorg@lists.freedesktop.org p|XrmEnumerateDatabase.3 |I |xorg@lists.freedesktop.org p|XrmInitialize.3 |I |xorg@lists.freedesktop.org p|XrmOptionDescRec.3 |I |xorg@lists.freedesktop.org p|XrmOptionKind.3 |I |xorg@lists.freedesktop.org p|XrmParseCommand.3 |I |xorg@lists.freedesktop.org p|XrmValue.3 |I |xorg@lists.freedesktop.org p|XtPopdown.3,XtCallbackPopdown.3,MenuPopdown.3 |I |xorg@lists.freedesktop.org p|XtSetArg.3,XtMergeArgLists.3 |I |xorg@lists.freedesktop.org p|Xutf8DrawText.3 |I |xorg@lists.freedesktop.org p|Xutf8TextListToTextProperty.3 |I |xorg@lists.freedesktop.org p|Xutf8TextPropertyToTextList.3 |I |xorg@lists.freedesktop.org p|XwcDrawText.3 |I |xorg@lists.freedesktop.org p|XwcFreeStringList.3 |I |xorg@lists.freedesktop.org p|XwcTextListToTextProperty.3 |I |xorg@lists.freedesktop.org p|XwcTextPropertyToTextList.3 |I |xorg@lists.freedesktop.org y|XrmUniqueQuark.3x | |xorg@lists.freedesktop.org y|Xnest.1x | |xorg@lists.freedesktop.org y|x11perf.1x,x11perfcomp.1x| |xorg@lists.freedesktop.org p|xcalc.1 |Io |xorg@lists.freedesktop.org p|xclipboard.1 |I |xorg@lists.freedesktop.org p|xclock.1 |I |xorg@lists.freedesktop.org p|xconsole.1 |I |xorg@lists.freedesktop.org nA|xdr.3 |I |mtk-manpages@gmx.net nA|xdr_accepted_reply.3 |I |mtk-manpages@gmx.net nA|xdr_array.3 |I |mtk-manpages@gmx.net nA|xdr_authunix_parms.3 |I |mtk-manpages@gmx.net nA|xdr_bool.3 |I |mtk-manpages@gmx.net nA|xdr_bytes.3 |I |mtk-manpages@gmx.net nA|xdr_callhdr.3 |I |mtk-manpages@gmx.net nA|xdr_callmsg.3 |I |mtk-manpages@gmx.net nA|xdr_char.3 |I |mtk-manpages@gmx.net nA|xdr_destroy.3 |I |mtk-manpages@gmx.net nA|xdr_double.3 |I |mtk-manpages@gmx.net nA|xdr_enum.3 |I |mtk-manpages@gmx.net nA|xdr_float.3 |I |mtk-manpages@gmx.net nA|xdr_free.3 |I |mtk-manpages@gmx.net nA|xdr_getpos.3 |I |mtk-manpages@gmx.net nA|xdr_inline.3 |I |mtk-manpages@gmx.net nA|xdr_int.3 |I |mtk-manpages@gmx.net nA|xdr_long.3 |I |mtk-manpages@gmx.net nA|xdr_opaque.3 |I |mtk-manpages@gmx.net nA|xdr_opaque_auth.3 |I |mtk-manpages@gmx.net nA|xdr_pmap.3 |I |mtk-manpages@gmx.net nA|xdr_pmaplist.3 |I |mtk-manpages@gmx.net nA|xdr_pointer.3 |I |mtk-manpages@gmx.net nA|xdr_reference.3 |I |mtk-manpages@gmx.net nA|xdr_rejected_reply.3 |I |mtk-manpages@gmx.net nA|xdr_replymsg.3 |I |mtk-manpages@gmx.net nA|xdr_setpos.3 |I |mtk-manpages@gmx.net nA|xdr_short.3 |I |mtk-manpages@gmx.net nA|xdr_string.3 |I |mtk-manpages@gmx.net nA|xdr_u_char.3 |I |mtk-manpages@gmx.net nA|xdr_u_int.3 |I |mtk-manpages@gmx.net nA|xdr_u_long.3 |I |mtk-manpages@gmx.net nA|xdr_u_short.3 |I |mtk-manpages@gmx.net nA|xdr_union.3 |I |mtk-manpages@gmx.net nA|xdr_vector.3 |I |mtk-manpages@gmx.net nA|xdr_void.3 |I |mtk-manpages@gmx.net nA|xdr_wrapstring.3 |I |mtk-manpages@gmx.net nA|xdrmem_create.3 |I |mtk-manpages@gmx.net nA|xdrrec_create.3 |I |mtk-manpages@gmx.net nA|xdrrec_endofrecord.3 |I |mtk-manpages@gmx.net nA|xdrrec_eof.3 |I |mtk-manpages@gmx.net nA|xdrrec_skiprecord.3 |I |mtk-manpages@gmx.net nA|xdrstdio_create.3 |I |mtk-manpages@gmx.net p|xedit.1 |Io |xorg@lists.freedesktop.org 1n|xfd.1 |o |xorg@lists.freedesktop.org y|xfontsel.1x,xlsfonts.1x | |xorg@lists.freedesktop.org 1n|xkbevd.1 |J |xorg@lists.freedesktop.org p|xload.1 |I |xorg@lists.freedesktop.org p|xlogo.1 |I |xorg@lists.freedesktop.org p|xman.1 |Io |xorg@lists.freedesktop.org y|Xmark.1x | |xorg@lists.freedesktop.org y|xminicom.1 | |miquels@cistron.nl y|xml_pp.1 | |Michel Rodriguez y|xml_spellcheck.1 | |Michel Rodriguez gA|xmlto.1 |I |tim@cyberelk.net 1p|xorg.conf.5,xorg.conf.d.5|Lu |xorg@lists.freedesktop.org nA|xprt_register.3 |I |mtk-manpages@gmx.net nA|xprt_unregister.3 |I |mtk-manpages@gmx.net y|xrdb.1x | |xorg@lists.freedesktop.org y|xrandr.1 | |xorg@lists.freedesktop.org p|Xsecurity.7 |W |xorg@lists.freedesktop.org n|Xserver.1 |J |xorg@lists.freedesktop.org p|XStandards.7 |H |xorg@lists.freedesktop.org 1n|xterm.1 |LI |xorg@lists.freedesktop.org y|XQueryExtension.3x | |xorg@lists.freedesktop.org y|xscreensaver-text.1 | |jwz@jwz.org y|xset.1x | |xorg@lists.freedesktop.org y|xsltproc.1 | |xslt@gnome.org y|xtotroff.1 | |bug-groff@gnu.org nA|zic.8 |I |patches@eglibc.org pA|zip.1 |J |Info-ZIP-Dev@goatley.com nA|zipinfo.1 |* |newt@pobox.com pA|zipcloak.1,zipnote.1,zipsplit.1|I |Info-ZIP-Dev@goatley.com nA|zlib.3 |C |zlib@gzip.org doclifter-2.11/tests/0000775000175000017500000000000012152465736012733 5ustar esresrdoclifter-2.11/tests/docliftertest1.man0000664000175000017500000001501212152465736016363 0ustar esresr.\" Test load for doclifter .TH docliftertest1 1 .SH NAME docliftertest1 \- section 1 test load for doclifter .SH SYNOPSIS \fBdocliftertest1\fR [-a | -b] [\fIoptional...\fR] \fBdocliftertest1\fR -c \fI\fP \fBdocliftertest1\fR -d [ .B optional ] \fBdocliftertest1\fR [ -e | -f foo ] ... .SH DESCRIPTION This file is a test load for doclifter, intended to exercise as much as possible of its translation capability. You are now reading the last sentence of an ordinary paragraph; by inspecting the output, you can check that your formatter is generating a correct beginning-of-body even after the section title, and an end-of-body event at the end of the paragraph. .PP This is an ordinary paragraph started by a \fB.PP\fR macro. A second line illustrates the effect of filling. .PP This .B word should be bold. This .SM word should be small. The word .SM ASCII is actually an acronym. This is a reference to section: .SM SEE ALSO it should be a link now. Visiting the .SM SYNOPSIS is important. While the .SM SYNOPYOSIS is not important and doesn't exist. .IR This sentence should alternate italic and bold. The words in the last sentence should have been run together. .LP This is an ordinary paragraph started by a \fB.LP\fR macro. A second line illustrates the effect of filling. .HP 5 This is a paragraph started by an \fB.HP\fR macro. We translate it to DocBook as an ordinary paragraph break. .IP & 5 This paragraph was led with \fB.IP & 5\fP. A sample line to see how it formats -- it should turn into list markup. .PP There should be an index entry generated right after this sentence. .IX Item And right before this one. .IP 5 This paragraph was led with \fB.IP 5\fP. This should turn into an ordinary paragraph. .PP This paragraph contains a URL, http://www.google.com, that doesn't have explicit \fB.UR\fP/\fB.UN\fR tags around it. It should not be marked up, because \fB.UR\fP/\fB.UN\fR tags exist in this document. .IP \(bu This is the first item in a bulleted list. .IP \(bu This is the second item in a bulleted list. .IP \(bu This is the third item in a bulleted list. .PP This is another ordinary paragraph. It's going to be immediately followed (without an intervening paragraph tag) by a table example lifted straight from Mike Lesk's original tbl paper: .TS center, box; c s s s c s s s c |c |c |c c |c |c |c l |n |n |n. 1970 Federal Budget Transfers \s-2(in billions of dollars)\s0 = State Taxes Money Net \^ collected spent \^ _ New York 22.91 21.35 \-1.56 New Jersey 8.33 6.96 \-1.37 Connecticut 4.12 3.10 \-1.02 Maine 0.74 0.67 \-0.07 California 22.29 22.42 +0.13 New Mexico 0.70 1.49 +0.79 Georgia 3.30 4.28 +0.98 Mississippi 1.15 2.32 +1.17 Texas 9.33 11.13 +1.80 .TE In the above table, the presence or absence of cell borders may not be exactly as .BR tbl (1) specified them (the DocBook DSSL toolchain sets BORDER=1 if there is any frame attribute, which is wrong; according to the DocBook specification, the frame attribute should only control box drawing around the exterior of the table). But the horizontal spanning and centering should be displayed properly. .SS MORE TABLES We just started a subsection. .P Here's another table. The first line of the table contains a heading centered across all three columns; each remaining line contains a left-adjusted item in the first column followed by two columns of numerical data. (The numerical alignment won't translate into DocBook.) .TS c s s l n n. Overall title Item-a 34.22 9.1 Item-b 12.65 .02 Items: c,d,e 23 5.8 Total 69.87 14.92 .TE This table illustrates the effect of the \fBexpand\fR option: .TS expand; c s s s c c c c l l n n. Bell Labs Locations Name Address Area Code Phone Holmdel Holmdel, N. J. 07733 201 949-3000 Murray Hill Murray Hill, N. J. 07974 201 582-6377 Whippany Whippany, N. J. 07981 201 386-3000 Indian Hill Naperville, Illinois 60540 312 690-2000 .TE Here's a really gnarly table with a lot of vertically spanned content and several multiline items per line. However this is not done with a vertically-spanned format; for that, see the next example. .TS box; cb s s s c | c | c s ltiw(1i) | ltw(2i) | lp8| lw(1.6i)p8. Some Interesting Places _ Name Description Practical Information _ T{ American Museum of Natural History T} T{ The collections fill 11.5 acres (Michelin) or 25 acres (MTA) of exhibition halls on four floors. There is a full-sized replica of a blue whale and the world's largest star sapphire (stolen in 1964). T} Hours 10-5, ex. Sun 11-5, Wed. to 9 \^ \^ Location T{ Central Park West & 79th St. T} \^ \^ Admission Donation: $1.00 asked \^ \^ Subway AA to 81st St. \^ \^ Telephone 212-873-4225 _ Bronx Zoo T{ About a mile long and .6 mile wide, this is the largest zoo in America. A lion eats 18 pounds of meat a day while a sea lion eats 15 pounds of fish. T} Hours T{ 10-4:30 winter, to 5:00 summer T} \^ \^ Location T{ 185th St. & Southern Blvd, the Bronx. T} \^ \^ Admission $1.00, but Tu,We,Th free \^ \^ Subway 2, 5 to East Tremont Ave. \^ \^ Telephone 212-933-1759 _ Brooklyn Museum T{ Five floors of galleries contain American and ancient art. There are American period rooms and architectural ornaments saved from wreckers, such as a classical figure from Pennsylvania Station. T} Hours Wed-Sat, 10-5, Sun 12-5 \^ \^ Location T{ Eastern Parkway & Washington Ave., Brooklyn. T} \^ \^ Admission Free \^ \^ Subway 2,3 to Eastern Parkway. \^ \^ Telephone 212-638-5000 _ T{ New-York Historical Society T} T{ All the original paintings for Audubon's .I Birds of America are here, as are exhibits of American decorative arts, New York history, Hudson River school paintings, carriages, and glass paperweights. T} Hours T{ Tues-Fri & Sun, 1-5; Sat 10-5 T} \^ \^ Location T{ Central Park West & 77th St. T} \^ \^ Admission Free \^ \^ Subway AA to 81st St. \^ \^ Telephone 212-873-3400 .TE OK, here is a table example with spanned vertical format. It illustrates the vertical-spanning bug noted on the .BR doclifter (1) manual page (but .BR troff2docbook (1) translates this table correctly). If the translation were completely correct, the "E" entry would span one row further downward. .TS allbox; l l l l l l l ^ l. A B C _ D E F G H I J .TE .P Now we'll test PIC translation to SVG. .PS box "box" .PE This line tests recognition of \v'-.4m'\fIsuperscripting\fR\v'.4m') ,br This line tests recognition of the \uother\d superscript idiom. .SH FILES The following items illustrate \fB.TP\fR markup: .TP 5 ${HOME}/.profile read at startup by .BR sh (1). .TP /etc/hosts list of static host addresses used by the \fIbind\fR(8) library. .SH SEE ALSO ls(1), .IR mkdir (1). .\" End doclifter-2.11/tests/console_ioctl.man0000664000175000017500000004630312152465736016272 0ustar esresr.\" Copyright (c) 1995 Jim Van Zandt and aeb .\" Sun Feb 26 11:46:23 MET 1995 .\" .\" This is free documentation; you can redistribute it and/or .\" modify it under the terms of the GNU General Public License as .\" published by the Free Software Foundation; either version 2 of .\" the License, or (at your option) any later version. .\" .\" The GNU General Public License's references to "object code" .\" and "executables" are to be interpreted as the output of any .\" document formatting or typesetting system, including .\" intermediate and printed output. .\" .\" This manual is distributed in the hope that it will be useful, .\" but WITHOUT ANY WARRANTY; without even the implied warranty of .\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the .\" GNU General Public License for more details. .\" .\" You should have received a copy of the GNU General Public .\" License along with this manual; if not, write to the Free .\" Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, .\" USA. .\" .\" Modified, Sun Feb 26 15:04:20 1995, faith@cs.unc.edu .\" Modified, Thu Apr 20 22:08:17 1995, jrv@vanzandt.mv.com .\" Modified, Mon Sep 18 22:32:47 1995, hpa@storm.net (H. Peter Anvin) .\" FIXME The following are not documented: .\" KDFONTOP (since 2.1.111) .\" KDGKBDIACRUC (since 2.6.24) .\" KDSKBDIACR .\" KDSKBDIACRUC (since 2.6.24) .\" KDKBDREP (since 2.1.113) .\" KDMAPDISP (not implemented as at 2.6.27) .\" KDUNMAPDISP (not implemented as at 2.6.27) .\" VT_LOCKSWITCH (since 1.3.47, needs CAP_SYS_TTY_CONFIG) .\" VT_UNLOCKSWITCH (since 1.3.47, needs CAP_SYS_TTY_CONFIG) .\" VT_GETHIFONTMASK (since 2.6.18) .\" .TH CONSOLE_IOCTL 4 2009-02-28 "Linux" "Linux Programmer's Manual" .SH NAME console_ioctl \- ioctl's for console terminal and virtual consoles .SH DESCRIPTION The following Linux-specific .BR ioctl (2) requests are supported. Each requires a third argument, assumed here to be \fIargp\fP. .IP \fBKDGETLED\fP Get state of LEDs. \fIargp\fP points to a \fIchar\fP. The lower three bits of \fI*argp\fP are set to the state of the LEDs, as follows: .TS l l l. LED_CAP 0x04 caps lock led LEC_NUM 0x02 num lock led LED_SCR 0x01 scroll lock led .TE .IP \fBKDSETLED\fP Set the LEDs. The LEDs are set to correspond to the lower three bits of \fIargp\fP. However, if a higher order bit is set, the LEDs revert to normal: displaying the state of the keyboard functions of caps lock, num lock, and scroll lock. .LP Before 1.1.54, the LEDs just reflected the state of the corresponding keyboard flags, and KDGETLED/KDSETLED would also change the keyboard flags. Since 1.1.54 the leds can be made to display arbitrary information, but by default they display the keyboard flags. The following two ioctl's are used to access the keyboard flags. .IP \fBKDGKBLED\fP Get keyboard flags CapsLock, NumLock, ScrollLock (not lights). \fIargp\fP points to a char which is set to the flag state. The low order three bits (mask 0x7) get the current flag state, and the low order bits of the next nibble (mask 0x70) get the default flag state. (Since 1.1.54.) .IP \fBKDSKBLED\fP Set keyboard flags CapsLock, NumLock, ScrollLock (not lights). \fIargp\fP has the desired flag state. The low order three bits (mask 0x7) have the flag state, and the low order bits of the next nibble (mask 0x70) have the default flag state. (Since 1.1.54.) .IP \fBKDGKBTYPE\fP Get keyboard type. This returns the value KB_101, defined as 0x02. .IP \fBKDADDIO\fP Add I/O port as valid. Equivalent to \fIioperm(arg,1,1)\fP. .IP \fBKDDELIO\fP Delete I/O port as valid. Equivalent to \fIioperm(arg,1,0)\fP. .IP \fBKDENABIO\fP Enable I/O to video board. Equivalent to \fIioperm(0x3b4, 0x3df-0x3b4+1, 1)\fP. .IP \fBKDDISABIO\fP Disable I/O to video board. Equivalent to \fIioperm(0x3b4, 0x3df-0x3b4+1, 0)\fP. .IP \fBKDSETMODE\fP Set text/graphics mode. \fIargp\fP is one of these: .TS l l. KD_TEXT 0x00 KD_GRAPHICS 0x01 .TE .IP \fBKDGETMODE\fP Get text/graphics mode. \fIargp\fP points to a \fIlong\fP which is set to one of the above values. .IP \fBKDMKTONE\fP Generate tone of specified length. The lower 16 bits of \fIargp\fP specify the period in clock cycles, and the upper 16 bits give the duration in msec. If the duration is zero, the sound is turned off. Control returns immediately. For example, \fIargp\fP = (125<<16) + 0x637 would specify the beep normally associated with a ctrl-G. (Thus since 0.99pl1; broken in 2.1.49-50.) .IP \fBKIOCSOUND\fP Start or stop sound generation. The lower 16 bits of \fIargp\fP specify the period in clock cycles (that is, \fIargp\fP = 1193180/frequency). \fIargp\fP = 0 turns sound off. In either case, control returns immediately. .IP \fBGIO_CMAP\fP Get the current default color map from kernel. \fIargp\fP points to a 48-byte array. (Since 1.3.3.) .IP \fBPIO_CMAP\fP Change the default text-mode color map. \fIargp\fP points to a 48-byte array which contains, in order, the Red, Green, and Blue values for the 16 available screen colors: 0 is off, and 255 is full intensity. The default colors are, in order: black, dark red, dark green, brown, dark blue, dark purple, dark cyan, light grey, dark grey, bright red, bright green, yellow, bright blue, bright purple, bright cyan and white. (Since 1.3.3.) .IP \fBGIO_FONT\fP Gets 256-character screen font in expanded form. \fIargp\fP points to an 8192 byte array. Fails with error code \fBEINVAL\fP if the currently loaded font is a 512-character font, or if the console is not in text mode. .IP \fBGIO_FONTX\fP Gets screen font and associated information. \fIargp\fP points to a \fIstruct consolefontdesc\fP (see \fBPIO_FONTX\fP). On call, the \fIcharcount\fP field should be set to the maximum number of characters that would fit in the buffer pointed to by \fIchardata\fP. On return, the \fIcharcount\fP and \fIcharheight\fP are filled with the respective data for the currently loaded font, and the \fIchardata\fP array contains the font data if the initial value of \fIcharcount\fP indicated enough space was available; otherwise the buffer is untouched and \fIerrno\fP is set to \fBENOMEM\fP. (Since 1.3.1.) .IP \fBPIO_FONT\fP Sets 256-character screen font. Load font into the EGA/VGA character generator. \fIargp\fP points to a 8192 byte map, with 32 bytes per character. Only first \fIN\fP of them are used for an 8x\fIN\fP font (0 < \fIN\fP <= 32). This call also invalidates the Unicode mapping. .IP \fBPIO_FONTX\fP Sets screen font and associated rendering information. \fIargp\fP points to a .RS .nf .ft CW struct consolefontdesc { unsigned short charcount; /* characters in font (256 or 512) */ unsigned short charheight; /* scan lines per character (1-32) */ char *chardata; /* font data in expanded form */ }; .ft .fi .RE If necessary, the screen will be appropriately resized, and \fBSIGWINCH\fP sent to the appropriate processes. This call also invalidates the Unicode mapping. (Since 1.3.1.) .IP \fBPIO_FONTRESET\fP Resets the screen font, size and Unicode mapping to the bootup defaults. \fIargp\fP is unused, but should be set to NULL to ensure compatibility with future versions of Linux. (Since 1.3.28.) .IP \fBGIO_SCRNMAP\fP Get screen mapping from kernel. \fIargp\fP points to an area of size E_TABSZ, which is loaded with the font positions used to display each character. This call is likely to return useless information if the currently loaded font is more than 256 characters. .IP \fBGIO_UNISCRNMAP\fP Get full Unicode screen mapping from kernel. \fIargp\fP points to an area of size E_TABSZ*sizeof(unsigned short), which is loaded with the Unicodes each character represent. A special set of Unicodes, starting at U+F000, are used to represent "direct to font" mappings. (Since 1.3.1.) .IP \fBPIO_SCRNMAP\fP Loads the "user definable" (fourth) table in the kernel which maps bytes into console screen symbols. \fIargp\fP points to an area of size E_TABSZ. .IP \fBPIO_UNISCRNMAP\fP Loads the "user definable" (fourth) table in the kernel which maps bytes into Unicodes, which are then translated into screen symbols according to the currently loaded Unicode-to-font map. Special Unicodes starting at U+F000 can be used to map directly to the font symbols. (Since 1.3.1.) .IP \fBGIO_UNIMAP\fP Get Unicode-to-font mapping from kernel. \fIargp\fP points to a .RS .nf .ft CW struct unimapdesc { unsigned short entry_ct; struct unipair *entries; }; .ft .fi .RE where \fIentries\fP points to an array of .RS .nf .ft CW struct unipair { unsigned short unicode; unsigned short fontpos; }; .ft .fi .RE (Since 1.1.92.) .IP \fBPIO_UNIMAP\fP Put unicode-to-font mapping in kernel. \fIargp\fP points to a \fIstruct unimapdesc\fP. (Since 1.1.92) .IP \fBPIO_UNIMAPCLR\fP Clear table, possibly advise hash algorithm. \fIargp\fP points to a .RS .nf .ft CW struct unimapinit { unsigned short advised_hashsize; /* 0 if no opinion */ unsigned short advised_hashstep; /* 0 if no opinion */ unsigned short advised_hashlevel; /* 0 if no opinion */ }; .ft .fi .RE (Since 1.1.92.) .IP \fBKDGKBMODE\fP Gets current keyboard mode. \fIargp\fP points to a \fIlong\fP which is set to one of these: .TS l l. K_RAW 0x00 K_XLATE 0x01 K_MEDIUMRAW 0x02 K_UNICODE 0x03 .TE .IP \fBKDSKBMODE\fP Sets current keyboard mode. \fIargp\fP is a \fIlong\fP equal to one of the above values. .IP \fBKDGKBMETA\fP Gets meta key handling mode. \fIargp\fP points to a \fIlong\fP which is set to one of these: .TS l l l. K_METABIT 0x03 set high order bit K_ESCPREFIX 0x04 escape prefix .TE .IP \fBKDSKBMETA\fP Sets meta key handling mode. \fIargp\fP is a \fIlong\fP equal to one of the above values. .IP \fBKDGKBENT\fP Gets one entry in key translation table (keycode to action code). \fIargp\fP points to a .RS .nf .ft CW struct kbentry { unsigned char kb_table; unsigned char kb_index; unsigned short kb_value; }; .ft .fi .RE with the first two members filled in: \fIkb_table\fP selects the key table (0 <= \fIkb_table\fP < MAX_NR_KEYMAPS), and \fIkb_index\fP is the keycode (0 <= \fIkb_index\fP < NR_KEYS). \fIkb_value\fP is set to the corresponding action code, or K_HOLE if there is no such key, or K_NOSUCHMAP if \fIkb_table\fP is invalid. .IP \fBKDSKBENT\fP Sets one entry in translation table. \fIargp\fP points to a \fIstruct kbentry\fP. .IP \fBKDGKBSENT\fP Gets one function key string. \fIargp\fP points to a .RS .nf .ft CW struct kbsentry { unsigned char kb_func; unsigned char kb_string[512]; }; .ft .fi .RE \fIkb_string\fP is set to the (null-terminated) string corresponding to the \fIkb_func\fPth function key action code. .IP \fBKDSKBSENT\fP Sets one function key string entry. \fIargp\fP points to a \fIstruct kbsentry\fP. .IP \fBKDGKBDIACR\fP Read kernel accent table. \fIargp\fP points to a .RS .nf .ft CW struct kbdiacrs { unsigned int kb_cnt; struct kbdiacr kbdiacr[256]; }; .ft .fi .RE where \fIkb_cnt\fP is the number of entries in the array, each of which is a .RS .nf .ft CW struct kbdiacr { unsigned char diacr; unsigned char base; unsigned char result; }; .ft .fi .RE .IP \fBKDGETKEYCODE\fP Read kernel keycode table entry (scan code to keycode). \fIargp\fP points to a .RS .nf .ft CW struct kbkeycode { unsigned int scancode; unsigned int keycode; }; .ft .fi .RE \fIkeycode\fP is set to correspond to the given \fIscancode\fP. (89 <= \fIscancode\fP <= 255 only. For 1 <= \fIscancode\fP <= 88, \fIkeycode\fP==\fIscancode\fP.) (Since 1.1.63.) .IP \fBKDSETKEYCODE\fP Write kernel keycode table entry. \fIargp\fP points to a \fIstruct kbkeycode\fP. (Since 1.1.63.) .IP \fBKDSIGACCEPT\fP The calling process indicates its willingness to accept the signal \fIargp\fP when it is generated by pressing an appropriate key combination. (1 <= \fIargp\fP <= NSIG). (See spawn_console() in linux/drivers/char/keyboard.c.) .IP \fBVT_OPENQRY\fP Returns the first available (non-opened) console. \fIargp\fP points to an \fIint\fP which is set to the number of the vt (1 <= \fI*argp\fP <= MAX_NR_CONSOLES). .IP \fBVT_GETMODE\fP Get mode of active vt. \fIargp\fP points to a .RS .nf .ft CW struct vt_mode { char mode; /* vt mode */ char waitv; /* if set, hang on writes if not active */ short relsig; /* signal to raise on release req */ short acqsig; /* signal to raise on acquisition */ short frsig; /* unused (set to 0) */ }; .ft .fi .RE which is set to the mode of the active vt. \fImode\fP is set to one of these values: .TS l l. VT_AUTO auto vt switching VT_PROCESS process controls switching VT_ACKACQ acknowledge switch .TE .IP \fBVT_SETMODE\fP Set mode of active vt. \fIargp\fP points to a \fIstruct vt_mode\fP. .IP \fBVT_GETSTATE\fP Get global vt state info. \fIargp\fP points to a .RS .nf .ft CW struct vt_stat { unsigned short v_active; /* active vt */ unsigned short v_signal; /* signal to send */ unsigned short v_state; /* vt bit mask */ }; .ft .fi .RE For each vt in use, the corresponding bit in the \fIv_state\fP member is set. (Kernels 1.0 through 1.1.92.) .IP \fBVT_RELDISP\fP Release a display. .IP \fBVT_ACTIVATE\fP Switch to vt \fIargp\fP (1 <= \fIargp\fP <= MAX_NR_CONSOLES). .IP \fBVT_WAITACTIVE\fP Wait until vt \fIargp\fP has been activated. .IP \fBVT_DISALLOCATE\fP Deallocate the memory associated with vt \fIargp\fP. (Since 1.1.54.) .IP \fBVT_RESIZE\fP Set the kernel's idea of screensize. \fIargp\fP points to a .RS .nf .ft CW struct vt_sizes { unsigned short v_rows; /* # rows */ unsigned short v_cols; /* # columns */ unsigned short v_scrollsize; /* no longer used */ }; .ft .fi .RE Note that this does not change the videomode. See .BR resizecons (8). (Since 1.1.54.) .IP \fBVT_RESIZEX\fP Set the kernel's idea of various screen parameters. \fIargp\fP points to a .RS .nf .ft CW struct vt_consize { unsigned short v_rows; /* number of rows */ unsigned short v_cols; /* number of columns */ unsigned short v_vlin; /* number of pixel rows on screen */ unsigned short v_clin; /* number of pixel rows per character */ unsigned short v_vcol; /* number of pixel columns on screen */ unsigned short v_ccol; /* number of pixel columns per character */ }; .ft .fi .RE Any parameter may be set to zero, indicating "no change", but if multiple parameters are set, they must be self-consistent. Note that this does not change the videomode. See .BR resizecons (8). (Since 1.3.3.) .PP The action of the following ioctls depends on the first byte in the struct pointed to by \fIargp\fP, referred to here as the \fIsubcode\fP. These are legal only for the superuser or the owner of the current tty. .IP "\fBTIOCLINUX, subcode=0\fP" Dump the screen. Disappeared in 1.1.92. (With kernel 1.1.92 or later, read from /dev/vcsN or /dev/vcsaN instead.) .IP "\fBTIOCLINUX, subcode=1\fP" Get task information. Disappeared in 1.1.92. .IP "\fBTIOCLINUX, subcode=2\fP" Set selection. \fIargp\fP points to a .RS .nf .ft CW struct { char subcode; short xs, ys, xe, ye; short sel_mode; }; .ft .fi .RE \fIxs\fP and \fIys\fP are the starting column and row. \fIxe\fP and \fIye\fP are the ending column and row. (Upper left corner is row=column=1.) \fIsel_mode\fP is 0 for character-by-character selection, 1 for word-by-word selection, or 2 for line-by-line selection. The indicated screen characters are highlighted and saved in the static array sel_buffer in devices/char/console.c. .IP "\fBTIOCLINUX, subcode=3\fP" Paste selection. The characters in the selection buffer are written to \fIfd\fP. .IP "\fBTIOCLINUX, subcode=4\fP" Unblank the screen. .IP "\fBTIOCLINUX, subcode=5\fP" Sets contents of a 256-bit look up table defining characters in a "word", for word-by-word selection. (Since 1.1.32.) .IP "\fBTIOCLINUX, subcode=6\fP" \fIargp\fP points to a char which is set to the value of the kernel variable \fIshift_state\fP. (Since 1.1.32.) .IP "\fBTIOCLINUX, subcode=7\fP" \fIargp\fP points to a char which is set to the value of the kernel variable \fIreport_mouse\fP. (Since 1.1.33.) .IP "\fBTIOCLINUX, subcode=8\fP" Dump screen width and height, cursor position, and all the character-attribute pairs. (Kernels 1.1.67 through 1.1.91 only. With kernel 1.1.92 or later, read from /dev/vcsa* instead.) .IP "\fBTIOCLINUX, subcode=9\fP" Restore screen width and height, cursor position, and all the character-attribute pairs. (Kernels 1.1.67 through 1.1.91 only. With kernel 1.1.92 or later, write to /dev/vcsa* instead.) .IP "\fBTIOCLINUX, subcode=10\fP" Handles the Power Saving feature of the new generation of monitors. VESA screen blanking mode is set to \fIargp\fP[1], which governs what screen blanking does: \fI0\fP: Screen blanking is disabled. \fI1\fP: The current video adapter register settings are saved, then the controller is programmed to turn off the vertical synchronization pulses. This puts the monitor into "standby" mode. If your monitor has an Off_Mode timer, then it will eventually power down by itself. \fI2\fP: The current settings are saved, then both the vertical and horizontal synchronization pulses are turned off. This puts the monitor into "off" mode. If your monitor has no Off_Mode timer, or if you want your monitor to power down immediately when the blank_timer times out, then you choose this option. (\fICaution:\fP Powering down frequently will damage the monitor.) (Since 1.1.76.) .SH "RETURN VALUE" On success, 0 is returned. On error \-1 is returned, and \fIerrno\fP is set. .SH ERRORS \fIerrno\fP may take on these values: .TP .B EBADF The file descriptor is invalid. .TP .B ENOTTY The file descriptor is not associated with a character special device, or the specified request does not apply to it. .TP .B EINVAL The file descriptor or \fIargp\fP is invalid. .TP .B EPERM Insufficient permission. .SH NOTES .BR Warning : Do not regard this man page as documentation of the Linux console ioctl's. This is provided for the curious only, as an alternative to reading the source. Ioctl's are undocumented Linux internals, liable to be changed without warning. (And indeed, this page more or less describes the situation as of kernel version 1.1.94; there are many minor and not-so-minor differences with earlier versions.) Very often, ioctl's are introduced for communication between the kernel and one particular well-known program (fdisk, hdparm, setserial, tunelp, loadkeys, selection, setfont, etc.), and their behavior will be changed when required by this particular program. Programs using these ioctl's will not be portable to other versions of UNIX, will not work on older versions of Linux, and will not work on future versions of Linux. Use POSIX functions. .SH "SEE ALSO" .BR dumpkeys (1), .BR kbd_mode (1), .BR loadkeys (1), .BR mknod (1), .BR setleds (1), .BR setmetamode (1), .BR execve (2), .BR fcntl (2), .BR ioperm (2), .BR termios (3), .BR console (4), .BR console_codes (4), .BR mt (4), .BR sd (4), .BR tty (4), .BR tty_ioctl (4), .BR ttyS (4), .BR vcs (4), .BR vcsa (4), .BR charsets (7), .BR mapscrn (8), .BR resizecons (8), .BR setfont (8), .IR /usr/include/linux/kd.h , .I /usr/include/linux/vt.h .SH COLOPHON This page is part of release 3.35 of the Linux .I man-pages project. A description of the project, and information about reporting bugs, can be found at http://man7.org/linux/man-pages/. doclifter-2.11/tests/basic.troff0000664000175000017500000000231112152465736015053 0ustar esresr.\" Test for various troff features .\" Test translation .tr $\(bu We should see a bullet literal $ here. .ds XX frozzle This is a \*(XX string expansion example. .\" Simple macro definition, no macro calls inside it .de YY Macro expansion text with argument \$1 .. .\" Here's the test invocation .YY foo .\" Let's be sure we keep getting lines after macroexpansion .de AA Before subcall .YY bar After subcall: \$1 .. .\" next line tests .so .so testinclude .\" OK, here's the two-level macroexpansion .AA baz .\" Test conditionals .de CO1 .if n 1: You should see this .\} .. .CO1 .de CO2 .ie n \{ 2: You should see this .\} .el\{ 2: You should not see this .\} .. .CO2 .pm .ie n 3: You should see this .el 3: You should not see this .\" Something is funky with the else handling .if n TRUE .el FALSE \" Test HTMLization .br Hi there .de Sh dummy .. .\" Do we blow our stack? .if n .Sh """Considerations""" .el .Sh "``Considerations''" .\" Example test .ft CW .in +4 .nf This is an example .ft .in -4 .fi foo\ bar .br This line tests recognition of \v'-.4m'\fIsuperscripting\*(ic\fR\v'.4m') This line tests recognition of the \uother\d superscript idiom. Before ignore .ig This line is ignored .. After ignore # End. doclifter-2.11/tests/groff_char.chk0000664000175000017500000046142612152465736015537 0ustar esresr ]> 01 April 2012 GROFF_CHAR 7 01 April 2012 Groff Version 1.21 groff_char groff glyph names DESCRIPTION This manual page lists the standard groff glyph names and the default input mapping, ­latin1. The glyphs in this document look different depending on which output device was chosen (with option for the man1 program or the roff formatter). Glyphs not available for the device that is being used to print or view this manual page are marked with `(N/A)'. In the actual version, groff provides only ­8-bit characters for direct input and named entities for further glyphs. On ASCII platforms, input character codes in the range 0 to 127 (decimal) represent the usual ­7-bit ASCII characters, while codes between 127 and 255 are interpreted as the corresponding characters in the ­latin1 (­ISO-8859-1) code set by default. This mapping is contained in the file latin1.tmac and can be changed by loading a different input encoding. Note that some of the input characters are reserved by groff, either for internal use or for special input purposes. On EBCDIC platforms, only code page cp1047 is supported (which contains the same characters as ­latin1; the input encoding file is called cp1047.tmac). Again, some input characters are reserved for internal and special purposes. All roff systems provide the concept of named glyphs. In traditional roff systems, only names of length 2 were used, while groff also provides support for longer names. It is strongly suggested that only named glyphs are used for all character representations outside of the printable ­7-bit ASCII range. Some of the predefined groff escape sequences (with names of length 1) also produce single glyphs; these exist for historical reasons or are printable versions of syntactical characters. They include `\\', `', `\`', `\-', `\.', and `\e'; see groff7. In groff, all of these different types of characters and glyphs can be tested positively with the `.if c' conditional. REFERENCE In this section, the glyphs in groff are specified in tabular form. The meaning of the columns is as follows. Output shows how the glyph is printed for the current device; although this can have quite a different shape on other devices, it always represents the same glyph. Input specifies how the glyph is input either directly by a key on the keyboard, or by a groff escape sequence. Code applies to glyphs which can be input with a single character, and gives the ISO ­latin1 decimal code of that input character. Note that this code is equivalent to the lowest 256 Unicode characters, including ­7-bit ASCII in the range 0 to 127. PostScript gives the usual PostScript name of the glyph. Unicode is the glyph name used in composite glyph names. 7-bit Character Codes 32-126 These are the basic glyphs having 7-bit ASCII code values assigned. They are identical to the printable characters of the character standards ­ISO-8859-1 (­latin1) and Unicode (range Basic Latin). The glyph names used in composite glyph names are `u0020' up to `u007E'. Note that input characters in the range ­0-31 and character 127 are not printable characters. Most of them are invalid input characters for groff anyway, and the valid ones have special meaning. For EBCDIC, the printable characters are in the range ­66-255. 48-57 Decimal digits 0 to 9 (print as themselves). 65-90 Upper case letters A-Z (print as themselves). 97-122 Lower case letters a-z (print as themselves). Most of the remaining characters not in the just described ranges print as themselves; the only exceptions are the following characters: ` the ISO ­latin1 `Grave Accent' (code 96) prints as `, a left single quotation mark; the original character can be obtained with `\`'. '' the ISO ­latin1 `Apostrophe' (code 39) prints as ', a right single quotation mark; the original character can be obtained with `\(aq'. - the ISO ­latin1 `Hyphen, Minus Sign' (code 45) prints as a hyphen; a minus sign can be obtained with `\-'. ~ the ISO ­latin1 `Tilde' (code 126) is reduced in size to be usable as a diacritic; a larger glyph can be obtained with `\(ti'. ^ the ISO ­latin1 `Circumflex Accent' (code 94) is reduced in size to be usable as a diacritic; a larger glyph can be obtained with `\(ha'. Output Input Code PostScript Unicode Notes ! ! 33 exclam u0021 " " 34 quotedbl u0022 # # 35 numbersign u0023 $ $ 36 dollar u0024 % % 37 percent u0025 & & 38 ampersand u0026 ' ' 39 quoteright u0027 ( ( 40 parenleft u0028 ) ) 41 parenright u0029 * * 42 asterisk u002A + + 43 plus u002B , , 44 comma u002C - - 45 hyphen u2010 . . 46 period u002E / / 47 slash u002F : : 58 colon u003A ; ; 59 semicolon u003B < < 60 less u003C = = 61 equal u003D > > 62 greater u003E ? ? 63 question u003F @ @ 64 at u0040 [ [ 91 bracketleft u005B \ \ 92 backslash u005C ] ] 93 bracketright u005D ^ ^ 94 circumflex u005E circumflex accent _ _ 95 underscore u005F ` ` 96 quoteleft u0060 { { 123 braceleft u007B | | 124 bar u007C } } 125 braceright u007D ~ ~ 126 tilde u007E tilde accent 8-bit Character Codes 160 to 255 They are interpreted as printable characters according to the latin1 (ISO-8859-1) code set, being identical to the Unicode range Latin-1 Supplement. Input characters in range 128-159 (on non-EBCDIC hosts) are not printable characters. 160 the ISO ­latin1 no-break space is mapped to `\~', the stretchable space character. 173 the soft hyphen control character. groff never uses this character for output (thus it is omitted in the table below); the input character 173 is mapped onto `\%'. The remaining ranges (­161-172, ­174-255) are printable characters that print as themselves. Although they can be specified directly with the keyboard on systems with a ­latin1 code page, it is better to use their glyph names; see next section. Output Input Code PostScript Unicode Notes ¡ ¡ 161 exclamdown u00A1 inverted exclamation mark ¢ ¢ 162 cent u00A2 £ £ 163 sterling u00A3 ¤ ¤ 164 currency u00A4 ¥ ¥ 165 yen u00A5 ¦ ¦ 166 brokenbar u00A6 § § 167 section u00A7 ¨ ¨ 168 dieresis u00A8 © © 169 copyright u00A9 ª ª 170 ordfeminine u00AA « « 171 guillemotleft u00AB ¬ ¬ 172 logicalnot u00AC ® ® 174 registered u00AE ¯ ¯ 175 macron u00AF ° ° 176 degree u00B0 ± ± 177 plusminus u00B1 ² ² 178 twosuperior u00B2 ³ ³ 179 threesuperior u00B3 ´ ´ 180 acute u00B4 acute accent µ µ 181 mu u00B5 micro sign 182 paragraph u00B6 · · 183 periodcentered u00B7 ¸ ¸ 184 cedilla u00B8 ¹ ¹ 185 onesuperior u00B9 º º 186 ordmasculine u00BA » » 187 guillemotright u00BB ¼ ¼ 188 onequarter u00BC ½ ½ 189 onehalf u00BD ¾ ¾ 190 threequarters u00BE ¿ ¿ 191 questiondown u00BF À À 192 Agrave u0041_0300 Á Á 193 Aacute u0041_0301   194 Acircumflex u0041_0302 à à 195 Atilde u0041_0303 Ä Ä 196 Adieresis u0041_0308 Å Å 197 Aring u0041_030A Æ Æ 198 AE u00C6 Ç Ç 199 Ccedilla u0043_0327 È È 200 Egrave u0045_0300 É É 201 Eacute u0045_0301 Ê Ê 202 Ecircumflex u0045_0302 Ë Ë 203 Edieresis u0045_0308 Ì Ì 204 Igrave u0049_0300 Í Í 205 Iacute u0049_0301 Î Î 206 Icircumflex u0049_0302 Ï Ï 207 Idieresis u0049_0308 Ð Ð 208 Eth u00D0 Ñ Ñ 209 Ntilde u004E_0303 Ò Ò 210 Ograve u004F_0300 Ó Ó 211 Oacute u004F_0301 Ô Ô 212 Ocircumflex u004F_0302 Õ Õ 213 Otilde u004F_0303 Ö Ö 214 Odieresis u004F_0308 × × 215 multiply u00D7 Ø Ø 216 Oslash u00D8 Ù Ù 217 Ugrave u0055_0300 Ú Ú 218 Uacute u0055_0301 Û Û 219 Ucircumflex u0055_0302 Ü Ü 220 Udieresis u0055_0308 Ý Ý 221 Yacute u0059_0301 Þ Þ 222 Thorn u00DE ß ß 223 germandbls u00DF à à 224 agrave u0061_0300 á á 225 aacute u0061_0301 â â 226 acircumflex u0061_0302 ã ã 227 atilde u0061_0303 ä ä 228 adieresis u0061_0308 å å 229 aring u0061_030A æ æ 230 ae u00E6 ç ç 231 ccedilla u0063_0327 è è 232 egrave u0065_0300 é é 233 eacute u0065_0301 ê ê 234 ecircumflex u0065_0302 ë ë 235 edieresis u0065_0308 ì ì 236 igrave u0069_0300 í í 237 iacute u0069_0301 î î 238 icircumflex u0069_0302 ï ï 239 idieresis u0069_0308 ð ð 240 eth u00F0 ñ ñ 241 ntilde u006E_0303 ò ò 242 ograve u006F_0300 ó ó 243 oacute u006F_0301 ô ô 244 ocircumflex u006F_0302 õ õ 245 otilde u006F_0303 ö ö 246 odieresis u006F_0308 ÷ ÷ 247 divide u00F7 ø ø 248 oslash u00F8 ù ù 249 ugrave u0075_0300 ú ú 250 uacute u0075_0301 û û 251 ucircumflex u0075_0302 ü ü 252 udieresis u0075_0308 ý ý 253 yacute u0079_0301 þ þ 254 thorn u00FE ÿ ÿ 255 ydieresis u0079_0308 Named Glyphs Glyph names can be embedded into the document text by using escape sequences. groff7 describes how these escape sequences look. Glyph names can consist of quite arbitrary characters from the ASCII or ­latin1 code set, not only alphanumeric characters. Here some examples: \(ch A glyph having the 2-character name ch. \[char_name] A glyph having the name char_name (having length 1, 2, 3, . . .). Note that `c' is not the same as `\[c]' (c a single character): The latter is internally mapped to glyph name `\c'. By default, groff defines a single glyph name starting with a backslash, namely ­`\-', which can be either accessed as `\-' or `\[-]'. \[base_glyph composite_1 composite_2 . . .] A composite glyph; see below for a more detailed description. In groff, each ­8-bit input character can also referred to by the construct `\[charn]' where n is the decimal code of the character, a number between 0 and 255 without leading zeros (those entities are not glyph names). They are normally mapped onto glyphs using the .trin request. Another special convention is the handling of glyphs with names directly derived from a Unicode code point; this is discussed below. Moreover, new glyph names can be created by the .char request; see groff7. In the following, a plus sign in the `Notes' column indicates that this particular glyph name appears in the PS version of the original troff documentation, CSTR 54. Entries marked with `***' denote glyphs for mathematical purposes (mainly used for DVI output). Normally, such glyphs have metrics which make them unusable in normal text. Output Input PostScript Unicode Notes Ð \[-D] Eth u00D0 uppercase eth ð \[Sd] eth u00F0 lowercase eth Þ \[TP] Thorn u00DE uppercase thorn þ \[Tp] thorn u00FE lowercase thorn ß \[ss] germandbls u00DF German sharp s Ligatures and Other Latin Glyphs Output Input PostScript Unicode Notes \[ff] ff u0066_0066 ff ligature + \[fi] fi u0066_0069 fi ligature + \[fl] fl u0066_006C fl ligature + \[Fi] ffi u0066_0066_0069 ffi ligature + \[Fl] ffl u0066_0066_006C ffl ligature + Ł \[/L] Lslash u0141 (Polish) ł \[/l] lslash u0142 (Polish) Ø \[/O] Oslash u00D8 (Scandinavian) ø \[/o] oslash u00F8 (Scandinavian) Æ \[AE] AE u00C6 æ \[ae] ae u00E6 Œ \[OE] OE u0152 œ \[oe] oe u0153 ij \[IJ] IJ u0132 (Dutch) IJ \[ij] ij u0133 (Dutch) ı \[.i] dotlessi u0131 (Turkish) &jnodot; \[.j] dotlessj --- j without a dot Accented Characters Output Input PostScript Unicode Notes Á \['A] Aacute u0041_0301 Ć \['C] Cacute u0043_0301 É \['E] Eacute u0045_0301 Í \['I] Iacute u0049_0301 Ó \['O] Oacute u004F_0301 Ú \['U] Uacute u0055_0301 Ý \['Y] Yacute u0059_0301 á \['a] aacute u0061_0301 ć \['c] cacute u0063_0301 é \['e] eacute u0065_0301 í \['i] iacute u0069_0301 ó \['o] oacute u006F_0301 ú \['u] uacute u0075_0301 ý \['y] yacute u0079_0301 Ä \[:A] Adieresis u0041_0308 A with umlaut Ë \[:E] Edieresis u0045_0308 Ï \[:I] Idieresis u0049_0308 Ö \[:O] Odieresis u004F_0308 Ü \[:U] Udieresis u0055_0308 Ÿ \[:Y] Ydieresis u0059_0308 ä \[:a] adieresis u0061_0308 ë \[:e] edieresis u0065_0308 ï \[:i] idieresis u0069_0308 ö \[:o] odieresis u006F_0308 ü \[:u] udieresis u0075_0308 ÿ \[:y] ydieresis u0079_0308  \[^A] Acircumflex u0041_0302 Ê \[^E] Ecircumflex u0045_0302 Î \[^I] Icircumflex u0049_0302 Ô \[^O] Ocircumflex u004F_0302 Û \[^U] Ucircumflex u0055_0302 â \[^a] acircumflex u0061_0302 ê \[^e] ecircumflex u0065_0302 î \[^i] icircumflex u0069_0302 ô \[^o] ocircumflex u006F_0302 û \[^u] ucircumflex u0075_0302 À \[`A] Agrave u0041_0300 È \[`E] Egrave u0045_0300 Ì \[`I] Igrave u0049_0300 Ò \[`O] Ograve u004F_0300 Ù \[`U] Ugrave u0055_0300 à \[`a] agrave u0061_0300 è \[`e] egrave u0065_0300 ì \[`i] igrave u0069_0300 ò \[`o] ograve u006F_0300 ù \[`u] ugrave u0075_0300 à \[~A] Atilde u0041_0303 Ñ \[~N] Ntilde u004E_0303 Õ \[~O] Otilde u004F_0303 ã \[~a] atilde u0061_0303 ñ \[~n] ntilde u006E_0303 õ \[~o] otilde u006F_0303 Š \[vS] Scaron u0053_030C š \[vs] scaron u0073_030C Ž \[vZ] Zcaron u005A_030C ž \[vz] zcaron u007A_030C Ç \[,C] Ccedilla u0043_0327 ç \[,c] ccedilla u0063_0327 Å \[oA] Aring u0041_030A å \[oa] aring u0061_030A Accents The composite request is used to map most of the accents to non-spacing glyph names; the values given in parentheses are the original (spacing) ones. Output Input PostScript Unicode Notes ˝ \[a"] hungarumlaut u030B (u02DD) (Hungarian) ¯ \[a-] macron u0304 (u00AF) ˙ \[a.] dotaccent u0307 (u02D9) ˆ \[a^] circumfle u0302 (u005E) ´ \[aa] acute u0301 (u00B4) + ` \[ga] grave u0300 (u0060) + ˘ \[ab] breve u0306 (u02D8) ¸ \[ac] cedilla u0327 (u00B8) ¨ \[ad] dieresis u0308 (u00A8) umlaut ˇ \[ah] caron u030C (u02C7) ˚ \[ao] ring u030A (u02DA) circle ˜ \[a~] tilde u0303 (u007E) ˛ \[ho] ogonek u0328 (u02DB) hook ^ \[ha] asciicircum u005E (spacing) ~ \[ti] asciitilde u007E (spacing) Quotes Output Input PostScript Unicode Notes \[Bq] quotedblbase u201E low double comma quote \[bq] quotesinglbase u201A low single comma quote \[lq] quotedblleft u201C \[rq] quotedblright u201D \[oq] quoteleft u2018 single open quote \[cq] quoteright u2019 single closing quote '' \[aq] quotesingle u0027 apostrophe quote (ASCII 39) " \[dq] quotedbl u0022 double quote (ASCII 34) « \[Fo] guillemotleft u00AB » \[Fc] guillemotright u00BB &fo; \[fo] guilsinglleft u2039 &fc; \[fc] guilsinglright u203A Punctuation Output Input PostScript Unicode Notes ¡ \[r!] exclamdown u00A1 ¿ \[r?] questiondown u00BF \[em] emdash u2014 + \[en] endash u2013 \[hy] hyphen u2010 + Brackets The extensible bracket pieces are font-invariant glyphs. In classical troff only one glyph was available to vertically extend brackets, braces, and parentheses: `bv'. We map it rather arbitrarily to u23AA. Note that not all devices contain extensible bracket pieces which can be piled up with `\b' due to the restrictions of the escape's piling algorithm. A general solution to build brackets out of pieces is the following macro: .\" Make a pile centered vertically 0.5em .\" above the baseline. .\" The first argument is placed at the top. .\" The pile is returned in string `pile' .eo .de pile-make . nr pile-wd 0 . nr pile-ht 0 . ds pile-args . . nr pile-# \n[.$] . while \n[pile-#] \{\ . nr pile-wd (\n[pile-wd] >? \w'\$[\n[pile-#]]') . nr pile-ht +(\n[rst] - \n[rsb]) . as pile-args \v'\n[rsb]u'\" . as pile-args \Z'\$[\n[pile-#]]'\" . as pile-args \v'-\n[rst]u'\" . nr pile-# -1 . \} . . ds pile \v'(-0.5m + (\n[pile-ht]u / 2u))'\" . as pile \*[pile-args]\" . as pile \v'((\n[pile-ht]u / 2u) + 0.5m)'\" . as pile \h'\n[pile-wd]u'\" .. .ec Another complication is the fact that some glyphs which represent bracket pieces in original troff can be used for other mathematical symbols also, for example `lf' and `rf' which provide the `floor' operator. Other devices (most notably for DVI output) don't unify such glyphs. For this reason, the four glyphs `lf', `rf', `lc', and `rc' are not unified with similarly looking bracket pieces. In groff, only glyphs with long names are guaranteed to pile up correctly for all devices (provided those glyphs exist). Output Input PostScript Unicode Notes [ \[lB] bracketleft u005B ] \[rB] bracketright u005D { \[lC] braceleft u007B } \[rC] braceright u007D \[la] angleleft u27E8 left angle bracket \[ra] angleright u27E9 right angle bracket | \[bv] braceex u23AA vertical extension *** + \[braceex] braceex u23AA \[bracketlefttp] bracketlefttp u23A1 \[bracketleftbt] bracketleftbt u23A3 \[bracketleftex] bracketleftex u23A2 \[bracketrighttp] bracketrighttp u23A4 \[bracketrightbt] bracketrightbt u23A6 \[bracketrightex] bracketrightex u23A5 &tlt; \[lt] bracelefttp u23A7 + \[bracelefttp] bracelefttp u23A7 &lk; \[lk] braceleftmid u23A8 + \[braceleftmid] braceleftmid u23A8 &lb; \[lb] braceleftbt u23A9 + \[braceleftbt] braceleftbt u23A9 \[braceleftex] braceleftex u23AA &rt; \[rt] bracerighttp u23AB + \[bracerighttp] bracerighttp u23AB &rk; \[rk] bracerightmid u23AC + &bracerightmid; \[bracerightmid] bracerightmid u23AC &rb; \[rb] bracerightbt u23AD + &rb; \[bracerightbt] bracerightbt u23AD &bracerightex; \[bracerightex] bracerightex u23AA &parenlefttp; \[parenlefttp] parenlefttp u239B &parenleftbt; \[parenleftbt] parenleftbt u239D &parenleftex; \[parenleftex] parenleftex u239C &parenrighttp; \[parenrighttp] parenrighttp u239E &parenrightbt; \[parenrightbt] parenrightbt u23A0 &parenrightex; \[parenrightex] parenrightex u239F Arrows Output Input PostScript Unicode Notes \[<-] arrowleft u2190 + \[->] arrowright u2192 + \[<>] arrowboth u2194 (horizontal) \[da] arrowdown u2193 + \[ua] arrowup u2191 + \[va] arrowupdn u2195 \[lA] arrowdblleft u21D0 \[rA] arrowdblright u21D2 \[hA] arrowdblboth u21D4 (horizontal) \[dA] arrowdbldown u21D3 \[uA] arrowdblup u21D1 \[vA] uni21D5 u21D5 vertical double-headed double arrow &an; \[an] arrowhorizex u23AF horizontal arrow extension Lines The font-invariant glyphs `br', `ul', and `rn' form corners; they can be used to build boxes. Note that both the PostScript and the Unicode-derived names of these three glyphs are just rough approximations. `rn' also serves in classical troff as the horizontal extension of the square root sign. `ru' is a font-invariant glyph, namely a rule of length 0.5m. Output Input PostScript Unicode Notes | \[ba] bar u007C \[br] SF110000 u2502 box rule + _ \[ul] underscore u005F + ¯ \[rn] overline u203E + _ \[ru] --- --- baseline rule + ¦ \[bb] brokenbar u00A6 / \[sl] slash u002F + \ \[rs] backslash u005C reverse solidus Use `\[radicalex]', not `\[overline]', for continuation of square root Text markers Output Input PostScript Unicode Notes \[ci] circle u25CB + \[bu] bullet u2022 + \[dd] daggerdbl u2021 double dagger sign + \[dg] dagger u2020 + \[lz] lozenge u25CA \[sq] uni25A1 u25A1 white square + \[ps] paragraph u00B6 § \[sc] section u00A7 + &lh; \[lh] uni261C u261C hand pointing left + &rh; \[rh] a14 u261E hand pointing right + @ \[at] at u0040 # \[sh] numbersign u0023 &CR; \[CR] carriagereturn u21B5 \[OK] a19 u2713 check mark, tick Legal Symbols Output Input PostScript Unicode Notes © \[co] copyright u00A9 + \[rg] registered u00AE + \[tm] trademark u2122 \[bs] --- --- AT&T Bell Labs logo + The Bell Labs logo is not supported in groff. Currency symbols Output Input PostScript Unicode Notes $ \[Do] dollar u0024 ¢ \[ct] cent u00A2 + \[eu] --- u20AC official Euro symbol \[Eu] Euro u20AC font-specific Euro glyph variant ¥ \[Ye] yen u00A5 £ \[Po] sterling u00A3 British currency sign ¤ \[Cs] currency u00A4 Scandinavian currency sign ƒ \[Fn] florin u0192 Dutch currency sign Units Output Input PostScript Unicode Notes ° \[de] degree u00B0 + \[%0] perthousand u2030 per thousand, per mille sign \[fm] minute u2032 footmark, prime + \[sd] second u2033 µ \[mc] mu u00B5 micro sign ª \[Of] ordfeminine u00AA º \[Om] ordmasculine u00BA Logical Symbols Output Input PostScript Unicode Notes \[AN] logicaland u2227 \[OR] logicalor u2228 ¬ \[no] logicalnot u00AC + ¬ \[tno] logicalnot u00AC text variant of `no' \[te] existential u2203 there exists \[fa] universal u2200 for all ϶ \[st] suchthat u220B \[3d] therefore u2234 \[tf] therefore u2234 Mathematical Symbols Output Input PostScript Unicode Notes ½ \[12] onehalf u00BD "+" ¼ \[14] onequarter u00BC "+" ¾ \[34] threequarters u00BE "+" \[18] oneeighth u215B \[38] threeeighths u215C \[58] fiveeighths u215D \[78] seveneighths u215E ¹ \[S1] onesuperior u00B9 ² \[S2] twosuperior u00B2 ³ \[S3] threesuperior u00B3 + \[pl] plus u002B plus in special font + \[mi] minus u2212 minus in special font + \[-+] uni2213 u2213 ± \[+-] plusminus u00B1 + ± \[t+-] plusminus u00B1 text variant of `+-' · \[pc] periodcentered u00B7 · \[md] dotmath u22C5 multiplication dot × \[mu] multiply u00D7 + \[tmu] multiply u00D7 text variant of `mu' \[c*] circlemultiply u2297 multiply sign in a circle \[c+] circleplus u2295 plus in a circle ÷ \[di] divide u00F7 division + ÷ \[tdi] divide u00F7 text variant of `di' \[f/] fraction u2044 bar for fractions \[**] asteriskmath u2217 + \[<=] lessequal u2264 + \[>=] greaterequal u2265 + \[<<] uni226A u226A much less \[>>] uni226B u226B much greater = \[eq] equal u003D equals in special font + \[!=] notequal u003D_0338 + \[==] equivalence u2261 + \[ne] uni2262 u2261_0338 \[=~] congruent u2245 approx. equal \[|=] uni2243 u2243 asymptot. equal to + \[ap] similar u223C + \[~~] approxequal u2248 almost equal to \[~=] approxequal u2248 \[pt] proportional u221D + \[es] emptyset u2205 + \[mo] element u2208 + \[nm] notelement u2208_0338 \[sb] propersubset u2282 + \[nb] notsubset u2282_0338 \[sp] propersuperset u2283 + \[nc] uni2285 u2283_0338 not superset \[ib] reflexsubset u2286 + \[ip] reflexsuperset u2287 + \[ca] intersection u2229 intersection, cap + \[cu] union u222A union, cup + \[/_] angle u2220 \[pp] perpendicular u22A5 \[is] integral u222B + \[integral] integral u222B *** \[sum] summation u2211 *** \[product] product u220F *** \[coproduct] uni2210 u2210 *** \[gr] gradient u2207 + \[sr] radical u221A square root + \[sqrt] radical u221A *** &radicalex; \[radicalex] radicalex --- square root continuation &sqrtex; \[sqrtex] radicalex --- *** &lc; \[lc] uni2308 u2308 left ceiling + &rc; \[rc] uni2309 u2309 right ceiling + &lf; \[lf] uni230A u230A left floor + &rf; \[rf] uni230B u230B right floor + \[if] infinity u221E + \[Ah] aleph u2135 \[Im] Ifraktur u2111 Gothic I, imaginary \[Re] Rfraktur u211C Gothic R, real \[wp] weierstrass u2118 Weierstrass p \[pd] partialdiff u2202 partial differentiation + \[-h] uni210F u210F Planck constant / 2pi \[hbar] uni210F u210F Greek glyphs These glyphs are intended for technical use, not for real Greek; normally, the uppercase letters have upright shape, and the lowercase ones are slanted. There is a problem with the mapping of letter phi to Unicode. Prior to Unicode version 3.0, the difference between U+03C6, GREEK SMALL LETTER PHI, and U+03D5, GREEK PHI SYMBOL, was not clearly described; only the glyph shapes in the Unicode book could be used as a reference. Starting with Unicode 3.0, the reference glyphs have been exchanged and described verbally also: In mathematical context, U+03D5 is the stroked variant and U+03C6 the curly glyph. Unfortunately, most font vendors didn't update their fonts to this (incompatible) change in Unicode. At the time of this writing (January 2006), it is not clear yet whether the Adobe Glyph Names `phi' and `phi1' also change its meaning if used for mathematics, thus compatibility problems are likely to happen – being conservative, groff currently assumes that `phi' in a PostScript symbol font is the stroked version. In groff, symbol `\[*f]' always denotes the stroked version of phi, and `\[+f]' the curly variant. Output Input PostScript Unicode Notes &Agr; \[*A] Alpha u0391 + &Bgr; \[*B] Beta u0392 + &Ggr; \[*G] Gamma u0393 + &Dgr; \[*D] Delta u0394 + &Egr; \[*E] Epsilon u0395 + &Zgr; \[*Z] Zeta u0396 + &EEgr; \[*Y] Eta u0397 + &THgr; \[*H] Theta u0398 + &Igr; \[*I] Iota u0399 + &Kgr; \[*K] Kappa u039A + &Lgr; \[*L] Lambda u039B + &Mgr; \[*M] Mu u039C + &Ngr; \[*N] Nu u039D + Ξ \[*C] Xi u039E + &Ogr; \[*O] Omicron u039F + &Pgr; \[*P] Pi u03A0 + &Rgr; \[*R] Rho u03A1 + &Sgr; \[*S] Sigma u03A3 + &Tgr; \[*T] Tau u03A4 + &Ugr; \[*U] Upsilon u03A5 + &PHgr; \[*F] Phi u03A6 + &KHgr; \[*X] Chi u03A7 + &PSgr; \[*Q] Psi u03A8 + &OHgr; \[*W] Omega u03A9 + &agr; \[*a] alpha u03B1 + &bgr; \[*b] beta u03B2 + &ggr; \[*g] gamma u03B3 + &dgr; \[*d] delta u03B4 + &egr; \[*e] epsilon u03B5 + &zgr; \[*z] zeta u03B6 + &eegr; \[*y] eta u03B7 + &thgr; \[*h] theta u03B8 + &igr; \[*i] iota u03B9 + &kgr; \[*k] kappa u03BA + &lgr; \[*l] lambda u03BB + &mgr; \[*m] mu u03BC + &ngr; \[*n] nu u03BD + ξ \[*c] xi u03BE + &ogr; \[*o] omicron u03BF + &pgr; \[*p] pi u03C0 + &rgr; \[*r] rho u03C1 + &sfgr; \[ts] sigma1 u03C2 terminal sigma + &sgr; \[*s] sigma u03C3 + &tgr; \[*t] tau u03C4 + &ugr; \[*u] upsilon u03C5 + &phgr; \[*f] phi u03D5 (stroked glyph) + &khgr; \[*x] chi u03C7 + &psgr; \[*q] psi u03C8 + &ohgr; \[*w] omega u03C9 + &b.thetas; \[+h] theta1 u03D1 variant theta &b.phiv; \[+f] phi1 u03C6 variant phi (curly shape) &b.omega; \[+p] omega1 u03D6 variant pi, looking like omega &b.epsiv; \[+e] uni03F5 u03F5 variant epsilon Card symbols Output Input PostScript Unicode Notes \[CL] club u2663 black club suit \[SP] spade u2660 black spade suit \[HE] heart u2665 black heart suit \[u2661] uni2661 u2661 white heart suit \[DI] diamond u2666 black diamond suit \[u2662] uni2662 u2662 white diamond suit AUTHOR Copyright © 1989-2000, 2001, 2002, 2003, 2004, 2006, 2008, 2009 Free Software Foundation, Inc. This document is distributed under the terms of the FDL (GNU Free Documentation License) version 1.3 or later. You should have received a copy of the FDL on your system, it is also available on-line at the GNU copyleft site This document is part of groff, the GNU roff distribution. It was written by James Clark with additions by Werner Lemberg and Bernd Warken SEE ALSO groff1 the GNU roff formatter groff7 a short reference of the groff formatting language An extension to the troff character set for Europe, E.G. Keizer, K.J. Simonsen, J. Akkerhuis; EUUG Newsletter, Volume 9, No. 2, Summer 1989 The Unicode Standard doclifter-2.11/tests/pax.chk0000664000175000017500000054365312152465736014232 0ustar esresr 2003 PAX P 2003 IEEE/The Open Group POSIX Programmer's Manual pax portable archive interchange pax -cdnv -H -L -f archive -s replstr pattern pax -r -cdiknuv -H -L -f archive -o options -p string -s replstr pattern pax -w -dituvX -H -L -b blksize -a -f archive -o options -s replstr -x format file pax -r -w -diklntuvX -H -L -p string -s replstr file directory DESCRIPTION The pax utility shall read, write, and write lists of the members of archive files and copy directory hierarchies. A variety of archive formats shall be supported; see the format option. The action to be taken depends on the presence of the and options. The four combinations of and are referred to as the four modes of operation: list, read, write, and copy modes, corresponding respectively to the four forms shown in the SYNOPSIS section. list In list mode (when neither nor are specified), pax shall write the names of the members of the archive file read from the standard input, with pathnames matching the specified patterns, to standard output. If a named file is of type directory, the file hierarchy rooted at that file shall be listed as well. read In read mode (when is specified, but is not), pax shall extract the members of the archive file read from the standard input, with pathnames matching the specified patterns. If an extracted file is of type directory, the file hierarchy rooted at that file shall be extracted as well. The extracted files shall be created performing pathname resolution with the directory in which pax was invoked as the current working directory. If an attempt is made to extract a directory when the directory already exists, this shall not be considered an error. If an attempt is made to extract a FIFO when the FIFO already exists, this shall not be considered an error. The ownership, access, and modification times, and file mode of the restored files are discussed under the option. write In write mode (when is specified, but is not), pax shall write the contents of the file operands to the standard output in an archive format. If no file operands are specified, a list of files to copy, one per line, shall be read from the standard input. A file of type directory shall include all of the files in the file hierarchy rooted at the file. copy In copy mode (when both and are specified), pax shall copy the file operands to the destination directory. If no file operands are specified, a list of files to copy, one per line, shall be read from the standard input. A file of type directory shall include all of the files in the file hierarchy rooted at the file. The effect of the copy shall be as if the copied files were written to an archive file and then subsequently extracted, except that there may be hard links between the original and the copied files. If the destination directory is a subdirectory of one of the files to be copied, the results are unspecified. If the destination directory is a file of a type not defined by the System Interfaces volume of IEEE Std 1003.1-2001, the results are implementation-defined; otherwise, it shall be an error for the file named by the directory operand not to exist, not be writable by the user, or not be a file of type directory. In read or copy modes, if intermediate directories are necessary to extract an archive member, pax shall perform actions equivalent to the mkdir() function defined in the System Interfaces volume of IEEE Std 1003.1-2001, called with the following arguments: * The intermediate directory used as the path argument * The value of the bitwise-inclusive OR of S_IRWXU, S_IRWXG, and S_IRWXO as the mode argument If any specified pattern or file operands are not matched by at least one file or archive member, pax shall write a diagnostic message to standard error for each one that did not match and exit with a non-zero exit status. The archive formats described in the EXTENDED DESCRIPTION section shall be automatically detected on input. The default output archive format shall be implementation-defined. A single archive can span multiple files. The pax utility shall determine, in an implementation-defined manner, what file to read or write as the next file. If the selected archive format supports the specification of linked files, it shall be an error if these files cannot be linked when the archive is extracted. For archive formats that do not store file contents with each name that causes a hard link, if the file that contains the data is not extracted during this pax session, either the data shall be restored from the original file, or a diagnostic message shall be displayed with the name of a file that can be used to extract the data. In traversing directories, pax shall detect infinite loops; that is, entering a previously visited directory that is an ancestor of the last file visited. When it detects an infinite loop, pax shall write a diagnostic message to standard error and shall terminate. OPTIONS The pax utility shall conform to the Base Definitions volume of IEEE Std 1003.1-2001, Section 12.2, Utility Syntax Guidelines, except that the order of presentation of the , , and options is significant. The following options shall be supported: Read an archive file from standard input. Write files to the standard output in the specified archive format. Append files to the end of the archive. It is implementation-defined which devices on the system support appending. Additional file formats unspecified by this volume of IEEE Std 1003.1-2001 may impose restrictions on appending. blocksize Block the output at a positive decimal integer number of bytes per write to the archive file. Devices and archive formats may impose restrictions on blocking. Blocking shall be automatically determined on input. Conforming applications shall not specify a blocksize value larger than 32256. Default blocking when creating archives depends on the archive format. (See the option below.) Match all file or archive members except those specified by the pattern or file operands. Cause files of type directory being copied or archived or archive members of type directory being extracted or listed to match only the file or archive member itself and not the file hierarchy rooted at the file. archive Specify the pathname of the input or output archive, overriding the default standard input (in list or read modes) or standard output ( write mode). If a symbolic link referencing a file of type directory is specified on the command line, pax shall archive the file hierarchy rooted in the file referenced by the link, using the name of the link as the root of the file hierarchy. Otherwise, if a symbolic link referencing a file of any other file type which pax can normally archive is specified on the command line, then pax shall archive the file referenced by the link, using the name of the link. The default behavior shall be to archive the symbolic link itself. Interactively rename files or archive members. For each archive member matching a pattern operand or file matching a file operand, a prompt shall be written to the file /dev/tty. The prompt shall contain the name of the file or archive member, but the format is otherwise unspecified. A line shall then be read from /dev/tty. If this line is blank, the file or archive member shall be skipped. If this line consists of a single period, the file or archive member shall be processed with no modification to its name. Otherwise, its name shall be replaced with the contents of the line. The pax utility shall immediately exit with a non-zero exit status if end-of-file is encountered when reading a response or if /dev/tty cannot be opened for reading and writing. The results of extracting a hard link to a file that has been renamed during extraction are unspecified. Prevent the overwriting of existing files. (The letter ell.) In copy mode, hard links shall be made between the source and destination file hierarchies whenever possible. If specified in conjunction with or , when a symbolic link is encountered, the hard link created in the destination file hierarchy shall be to the file referenced by the symbolic link. If specified when neither nor is specified, when a symbolic link is encountered, the implementation shall create a hard link to the symbolic link in the source file hierarchy or copy the symbolic link to the destination. If a symbolic link referencing a file of type directory is specified on the command line or encountered during the traversal of a file hierarchy, pax shall archive the file hierarchy rooted in the file referenced by the link, using the name of the link as the root of the file hierarchy. Otherwise, if a symbolic link referencing a file of any other file type which pax can normally archive is specified on the command line or encountered during the traversal of a file hierarchy, pax shall archive the file referenced by the link, using the name of the link. The default behavior shall be to archive the symbolic link itself. Select the first archive member that matches each pattern operand. No more than one archive member shall be matched for each pattern (although members of type directory shall still match the file hierarchy rooted at that file). options Provide information to the implementation to modify the algorithm for extracting or writing files. The value of options shall consist of one or more comma-separated keywords of the form: keyword[[:]=value][,keyword[[:]=value], ...] Some keywords apply only to certain file formats, as indicated with each description. Use of keywords that are inapplicable to the file format being processed produces undefined results. Keywords in the options argument shall be a string that would be a valid portable filename as described in the Base Definitions volume of IEEE Std 1003.1-2001, Section 3.276, Portable Filename Character Set. Note:

Keywords are not expected to be filenames, merely to follow the same character composition rules as portable filenames.
Keywords can be preceded with white space. The value field shall consist of zero or more characters; within value, the application shall precede any literal comma with a backslash, which shall be ignored, but preserves the comma as part of value. A comma as the final character, or a comma followed solely by white space as the final characters, in options shall be ignored. Multiple options can be specified; if keywords given to these multiple options conflict, the keywords and values appearing later in command line sequence shall take precedence and the earlier shall be silently ignored. The following keyword values of options shall be supported for the file formats as indicated: delete=pattern
(Applicable only to the pax format.) When used in write or copy mode, pax shall omit from extended header records that it produces any keywords matching the string pattern. When used in read or list mode, pax shall ignore any keywords matching the string pattern in the extended header records. In both cases, matching shall be performed using the pattern matching notation described in Patterns Matching a Single Character and Patterns Matching Multiple Characters . For example: delete=security.* would suppress security-related information. See pax Extended Header for extended header record keyword usage.
exthdr.name=string
(Applicable only to the pax format.) This keyword allows user control over the name that is written into the ustar header blocks for the extended header produced under the circumstances described in pax Header Block . The name shall be the contents of string, after the following character substitutions have been made: string   Includes: Replaced By: %d The directory name of the file, equivalent to the result of the dirname utility on the translated pathname. %f The filename of the file, equivalent to the result of the basename utility on the translated pathname. %p The process ID of the pax process. %% A '%' character. Any other '%' characters in string produce undefined results. If no exthdr.name= string is specified, pax shall use the following default value: %d/PaxHeaders.%p/%f
globexthdr.name=string
(Applicable only to the pax format.) When used in write or copy mode with the appropriate options, pax shall create global extended header records with ustar header blocks that will be treated as regular files by previous versions of pax. This keyword allows user control over the name that is written into the ustar header blocks for global extended header records. The name shall be the contents of string, after the following character substitutions have been made: string   Includes: Replaced By: %n An integer that represents the sequence number of the global extended header record in the archive, starting at 1. %p The process ID of the pax process. %% A '%' character. Any other '%' characters in string produce undefined results. If no globexthdr.name= string is specified, pax shall use the following default value: $TMPDIR/GlobalHead.%p.%n where $ TMPDIR represents the value of the TMPDIR environment variable. If TMPDIR is not set, pax shall use /tmp.
invalid=action
(Applicable only to the pax format.) This keyword allows user control over the action pax takes upon encountering values in an extended header record that, in read or copy mode, are invalid in the destination hierarchy or, in list mode, cannot be written in the codeset and current locale of the implementation. The following are invalid values that shall be recognized by pax: * In read or copy mode, a filename or link name that contains character encodings invalid in the destination hierarchy. (For example, the name may contain embedded NULs.) * In read or copy mode, a filename or link name that is longer than the maximum allowed in the destination hierarchy (for either a pathname component or the entire pathname). * In list mode, any character string value (filename, link name, user name, and so on) that cannot be written in the codeset and current locale of the implementation. The following mutually-exclusive values of the action argument are supported: bypass
In read or copy mode, pax shall bypass the file, causing no change to the destination hierarchy. In list mode, pax shall write all requested valid values for the file, but its method for writing invalid values is unspecified.
rename
In read or copy mode, pax shall act as if the option were in effect for each file with invalid filename or link name values, allowing the user to provide a replacement name interactively. In list mode, pax shall behave identically to the bypass action.
UTF-8
When used in read, copy, or list mode and a filename, link name, owner name, or any other field in an extended header record cannot be translated from the pax UTF-8 codeset format to the codeset and current locale of the implementation, pax shall use the actual UTF-8 encoding for the name.
write
In read or copy mode, pax shall write the file, translating or truncating the name, regardless of whether this may overwrite an existing file with a valid name. In list mode, pax shall behave identically to the bypass action.
If no invalid= option is specified, pax shall act as if invalid= bypass were specified. Any overwriting of existing files that may be allowed by the invalid= actions shall be subject to permission ( ) and modification time ( ) restrictions, and shall be suppressed if the option is also specified.
linkdata
(Applicable only to the pax format.) In write mode, pax shall write the contents of a file to the archive even when that file is merely a hard link to a file whose contents have already been written to the archive.
listopt=format
This keyword specifies the output format of the table of contents produced when the option is specified in list mode. See List Mode Format Specifications . To avoid ambiguity, the listopt= format shall be the only or final keyword= value pair in a option-argument; all characters in the remainder of the option-argument shall be considered part of the format string. When multiple listopt= format options are specified, the format strings shall be considered a single, concatenated string, evaluated in command line order.
times
(Applicable only to the pax format.) When used in write or copy mode, pax shall include atime, ctime, and mtime extended header records for each file. See pax Extended Header File Times .
In addition to these keywords, if the pax format is specified, any of the keywords and values defined in pax Extended Header , including implementation extensions, can be used in option-arguments, in either of two modes:
keyword=value
When used in write or copy mode, these keyword/value pairs shall be included at the beginning of the archive as typeflag g global extended header records. When used in read or list mode, these keyword/value pairs shall act as if they had been at the beginning of the archive as typeflag g global extended header records.
keyword:=value
When used in write or copy mode, these keyword/value pairs shall be included as records at the beginning of a typeflag x extended header for each file. (This shall be equivalent to the equal-sign form except that it creates no typeflag g global extended header records.) When used in read or list mode, these keyword/value pairs shall act as if they were included as records at the end of each extended header; thus, they shall override any global or file-specific extended header record keywords of the same names. For example, in the command: pax -r -o " gname:=mygroup, " <archive the group name will be forced to a new value for all files read from the archive.
The precedence of keywords over various fields in the archive is described in pax Extended Header Keyword Precedence .
string Specify one or more file characteristic options (privileges). The string option-argument shall be a string specifying file characteristics to be retained or discarded on extraction. The string shall consist of the specification characters a , e , m , o , and p . Other implementation-defined characters can be included. Multiple characteristics can be concatenated within the same string and multiple options can be specified. The meaning of the specification characters are as follows: a
Do not preserve file access times.
e
Preserve the user ID, group ID, file mode bits (see the Base Definitions volume of IEEE Std 1003.1-2001, Section 3.168, File Mode Bits), access time, modification time, and any other implementation-defined file characteristics.
m
Do not preserve file modification times.
o
Preserve the user ID and group ID.
p
Preserve the file mode bits. Other implementation-defined file mode attributes may be preserved.
In the preceding list, "preserve" indicates that an attribute stored in the archive shall be given to the extracted file, subject to the permissions of the invoking process. The access and modification times of the file shall be preserved unless otherwise specified with the option or not stored in the archive. All attributes that are not preserved shall be determined as part of the normal file creation action (see File Read, Write, and Creation ). If neither the e nor the o specification character is specified, or the user ID and group ID are not preserved for any reason, pax shall not set the S_ISUID and S_ISGID bits of the file mode. If the preservation of any of these items fails for any reason, pax shall write a diagnostic message to standard error. Failure to preserve these items shall affect the final exit status, but shall not cause the extracted file to be deleted. If file characteristic letters in any of the string option-arguments are duplicated or conflict with each other, the ones given last shall take precedence. For example, if eme is specified, file modification times are preserved.
replstr Modify file or archive member names named by pattern or file operands according to the substitution expression replstr, using the syntax of the ed utility. The concepts of "address" and "line" are meaningless in the context of the pax utility, and shall not be supplied. The format shall be: old/new/[gp] where as in ed, old is a basic regular expression and new can contain an ampersand, '\n' (where n is a digit) backreferences, or subexpression matching. The old string shall also be permitted to contain <newline>s. Any non-null character can be used as a delimiter ( '/' shown here). Multiple expressions can be specified; the expressions shall be applied in the order specified, terminating with the first successful substitution. The optional trailing 'g' is as defined in the ed utility. The optional trailing 'p' shall cause successful substitutions to be written to standard error. File or archive member names that substitute to the empty string shall be ignored when reading and writing archives. When reading files from the file system, and if the user has the permissions required by utime() to do so, set the access time of each file read to the access time that it had before being read by pax. Ignore files that are older (having a less recent file modification time) than a pre-existing file or archive member with the same name. In read mode, an archive member with the same name as a file in the file system shall be extracted if the archive member is newer than the file. In write mode, an archive file member with the same name as a file in the file system shall be superseded if the file is newer than the archive member. If is also specified, this is accomplished by appending to the archive; otherwise, it is unspecified whether this is accomplished by actual replacement in the archive or by appending to the archive. In copy mode, the file in the destination hierarchy shall be replaced by the file in the source hierarchy or by a link to the file in the source hierarchy if the file in the source hierarchy is newer. In list mode, produce a verbose table of contents (see the STDOUT section). Otherwise, write archive member pathnames to standard error (see the STDERR section). format Specify the output archive format. The pax utility shall support the following formats: cpio
The cpio interchange format; see the EXTENDED DESCRIPTION section. The default blocksize for this format for character special archive files shall be 5120. Implementations shall support all blocksize values less than or equal to 32256 that are multiples of 512.
pax
The pax interchange format; see the EXTENDED DESCRIPTION section. The default blocksize for this format for character special archive files shall be 5120. Implementations shall support all blocksize values less than or equal to 32256 that are multiples of 512.
ustar
The tar interchange format; see the EXTENDED DESCRIPTION section. The default blocksize for this format for character special archive files shall be 10240. Implementations shall support all blocksize values less than or equal to 32256 that are multiples of 512.
Implementation-defined formats shall specify a default block size as well as any other block sizes supported for character special archive files. Any attempt to append to an archive file in a format different from the existing archive format shall cause pax to exit immediately with a non-zero exit status. In copy mode, if no format is specified, pax shall behave as if pax were specified.
When traversing the file hierarchy specified by a pathname, pax shall not descend into directories that have a different device ID ( st_dev; see the System Interfaces volume of IEEE Std 1003.1-2001, stat()). The options that operate on the names of files or archive members ( , , , , , and ) shall interact as follows. In read mode, the archive members shall be selected based on the user-specified pattern operands as modified by the , , and options. Then, any and options shall modify, in that order, the names of the selected files. The option shall write names resulting from these modifications. In write mode, the files shall be selected based on the user-specified pathnames as modified by the and options. Then, any and options shall modify, in that order, the names of these selected files. The option shall write names resulting from these modifications. If both the and options are specified, pax shall not consider a file selected unless it is newer than the file to which it is compared. List Mode Format Specifications In list mode with the listopt= format option, the format argument shall be applied for each selected file. The pax utility shall append a <newline> to the listopt output for each selected file. The format argument shall be used as the format string described in the Base Definitions volume of IEEE Std 1003.1-2001, Chapter 5, File Format Notation, with the exceptions 1. through 5. defined in the EXTENDED DESCRIPTION section of printf, plus the following exceptions: 6. The sequence ( keyword) can occur before a format conversion specifier. The conversion argument is defined by the value of keyword. The implementation shall support the following keywords: * Any of the Field Name entries in ustar Header Block and Octet-Oriented cpio Archive Entry . The implementation may support the cpio keywords without the leading c_ in addition to the form required by Values for cpio c_mode Field . * Any keyword defined for the extended header in pax Extended Header . * Any keyword provided as an implementation-defined extension within the extended header defined in pax Extended Header . For example, the sequence "%(charset)s" is the string value of the name of the character set in the extended header. The result of the keyword conversion argument shall be the value from the applicable header field or extended header, without any trailing NULs. All keyword values used as conversion arguments shall be translated from the UTF-8 encoding to the character set appropriate for the local file system, user database, and so on, as applicable. 7. An additional conversion specifier character, T , shall be used to specify time formats. The T conversion specifier character can be preceded by the sequence ( keyword= subformat), where subformat is a date format as defined by date operands. The default keyword shall be mtime and the default subformat shall be: %b %e %H:%M %Y 8. An additional conversion specifier character, M , shall be used to specify the file mode string as defined in ls Standard Output. If ( keyword) is omitted, the mode keyword shall be used. For example, %.1M writes the single character corresponding to the <entry type> field of the ls command. 9. An additional conversion specifier character, D , shall be used to specify the device for block or special files, if applicable, in an implementation-defined format. If not applicable, and ( keyword) is specified, then this conversion shall be equivalent to %(keyword)u. If not applicable, and ( keyword) is omitted, then this conversion shall be equivalent to <space>. 10. An additional conversion specifier character, F , shall be used to specify a pathname. The F conversion character can be preceded by a sequence of comma-separated keywords: (keyword[,keyword] ... ) The values for all the keywords that are non-null shall be concatenated together, each separated by a '/' . The default shall be ( path) if the keyword path is defined; otherwise, the default shall be ( prefix, name). 11. An additional conversion specifier character, L , shall be used to specify a symbolic line expansion. If the current file is a symbolic link, then %L shall expand to: "%s -> %s", <value of keyword>, <contents of link> Otherwise, the %L conversion specification shall be the equivalent of %F . OPERANDS The following operands shall be supported: directory The destination directory pathname for copy mode. file A pathname of a file to be copied or archived. pattern A pattern matching one or more pathnames of archive members. A pattern must be given in the name-generating notation of the pattern matching notation in Pattern Matching Notation , including the filename expansion rules in Patterns Used for Filename Expansion . The default, if no pattern is specified, is to select all members in the archive. STDIN In write mode, the standard input shall be used only if no file operands are specified. It shall be a text file containing a list of pathnames, one per line, without leading or trailing <blank>s. In list and read modes, if is not specified, the standard input shall be an archive file. Otherwise, the standard input shall not be used. INPUT FILES The input file named by the archive option-argument, or standard input when the archive is read from there, shall be a file formatted according to one of the specifications in the EXTENDED DESCRIPTION section or some other implementation-defined format. The file /dev/tty shall be used to write prompts and read responses. ENVIRONMENT VARIABLES The following environment variables shall affect the execution of pax: LANG Provide a default value for the internationalization variables that are unset or null. (See the Base Definitions volume of IEEE Std 1003.1-2001, Section 8.2, Internationalization Variables for the precedence of internationalization variables used to determine the values of locale categories.) LC_ALL If set to a non-empty string value, override the values of all the other internationalization variables. LC_COLLATE Determine the locale for the behavior of ranges, equivalence classes, and multi-character collating elements used in the pattern matching expressions for the pattern operand, the basic regular expression for the option, and the extended regular expression defined for the yesexpr locale keyword in the LC_MESSAGES category. LC_CTYPE Determine the locale for the interpretation of sequences of bytes of text data as characters (for example, single-byte as opposed to multi-byte characters in arguments and input files), the behavior of character classes used in the extended regular expression defined for the yesexpr locale keyword in the LC_MESSAGES category, and pattern matching. LC_MESSAGES Determine the locale for the processing of affirmative responses that should be used to affect the format and contents of diagnostic messages written to standard error. LC_TIME Determine the format and contents of date and time strings when the option is specified. NLSPATH Determine the location of message catalogs for the processing of LC_MESSAGES . TMPDIR Determine the pathname that provides part of the default global extended header record file, as described for the globexthdr= keyword in the OPTIONS section. TZ Determine the timezone used to calculate date and time strings when the option is specified. If TZ is unset or null, an unspecified default timezone shall be used. ASYNCHRONOUS EVENTS Default. STDOUT In write mode, if is not specified, the standard output shall be the archive formatted according to one of the specifications in the EXTENDED DESCRIPTION section, or some other implementation-defined format (see format). In list mode, when the listopt= format has been specified, the selected archive members shall be written to standard output using the format described under List Mode Format Specifications . In list mode without the listopt= format option, the table of contents of the selected archive members shall be written to standard output using the following format: "%s\n", <pathname> If the option is specified in list mode, the table of contents of the selected archive members shall be written to standard output using the following formats. For pathnames representing hard links to previous members of the archive: "%s == %s\n", <ls listing>, <linkname> For all other pathnames: "%s\n", <ls listing> where <ls  -l listing> shall be the format specified by the ls utility with the option. When writing pathnames in this format, it is unspecified what is written for fields for which the underlying archive format does not have the correct information, although the correct number of <blank>-separated fields shall be written. In list mode, standard output shall not be buffered more than a line at a time. STDERR If is specified in read, write, or copy modes, pax shall write the pathnames it processes to the standard error output using the following format: "%s\n", <pathname> These pathnames shall be written as soon as processing is begun on the file or archive member, and shall be flushed to standard error. The trailing <newline>, which shall not be buffered, is written when the file has been read or written. If the option is specified, and the replacement string has a trailing 'p' , substitutions shall be written to standard error in the following format: "%s >> %s\n", <original pathname>, <new pathname> In all operating modes of pax, optional messages of unspecified format concerning the input archive format and volume number, the number of files, blocks, volumes, and media parts as well as other diagnostic messages may be written to standard error. In all formats, for both standard output and standard error, it is unspecified how non-printable characters in pathnames or link names are written. When pax is in read mode or list mode, using the pax archive format, and a filename, link name, owner name, or any other field in an extended header record cannot be translated from the pax UTF-8 codeset format to the codeset and current locale of the implementation, pax shall write a diagnostic message to standard error, shall process the file as described for the invalid= option, and then shall process the next file in the archive. OUTPUT FILES In read mode, the extracted output files shall be of the archived file type. In copy mode, the copied output files shall be the type of the file being copied. In either mode, existing files in the destination hierarchy shall be overwritten only when all permission ( ), modification time ( ), and invalid-value ( invalid=) tests allow it. In write mode, the output file named by the option-argument shall be a file formatted according to one of the specifications in the EXTENDED DESCRIPTION section, or some other implementation-defined format. EXTENDED DESCRIPTION pax Interchange Format A pax archive tape or file produced in the pax format shall contain a series of blocks. The physical layout of the archive shall be identical to the ustar format described in ustar Interchange Format . Each file archived shall be represented by the following sequence: * An optional header block with extended header records. This header block is of the form described in pax Header Block , with a typeflag value of x or g. The extended header records, described in pax Extended Header , shall be included as the data for this header block. * A header block that describes the file. Any fields in the preceding optional extended header shall override the associated fields in this header block for this file. * Zero or more blocks that contain the contents of the file. At the end of the archive file there shall be two 512-byte blocks filled with binary zeros, interpreted as an end-of-archive indicator. A schematic of an example archive with global extended header records and two actual files is shown in pax Format Archive Example . In the example, the second file in the archive has no extended header preceding it, presumably because it has no need for extended attributes.
Figure: pax Format Archive Example
pax Header Block The pax header block shall be identical to the ustar header block described in ustar Interchange Format , except that two additional typeflag values are defined: x Represents extended header records for the following file in the archive (which shall have its own ustar header block). The format of these extended header records shall be as described in pax Extended Header . g Represents global extended header records for the following files in the archive. The format of these extended header records shall be as described in pax Extended Header . Each value shall affect all subsequent files that do not override that value in their own extended header record and until another global extended header record is reached that provides another value for the same field. The typeflag g global headers should not be used with interchange media that could suffer partial data loss in transporting the archive. For both of these types, the size field shall be the size of the extended header records in octets. The other fields in the header block are not meaningful to this version of the pax utility. However, if this archive is read by a pax utility conforming to the ISO POSIX-2:1993 standard, the header block fields are used to create a regular file that contains the extended header records as data. Therefore, header block field values should be selected to provide reasonable file access to this regular file. A further difference from the ustar header block is that data blocks for files of typeflag 1 (the digit one) (hard link) may be included, which means that the size field may be greater than zero. Archives created by pax linkdata shall include these data blocks with the hard links. pax Extended Header A pax extended header contains values that are inappropriate for the ustar header block because of limitations in that format: fields requiring a character encoding other than that described in the ISO/IEC 646:1991 standard, fields representing file attributes not described in the ustar header, and fields whose format or length do not fit the requirements of the ustar header. The values in an extended header add attributes to the following file (or files; see the description of the typeflag g header block) or override values in the following header block(s), as indicated in the following list of keywords. An extended header shall consist of one or more records, each constructed as follows: "%d %s=%s\n", <length>, <keyword>, <value> The extended header records shall be encoded according to the ISO/IEC 10646-1:2000 standard (UTF-8). The <length> field, <blank>, equals sign, and <newline> shown shall be limited to the portable character set, as encoded in UTF-8. The <keyword> and <value> fields can be any UTF-8 characters. The <length> field shall be the decimal length of the extended header record in octets, including the trailing <newline>. The <keyword> field shall be one of the entries from the following list or a keyword provided as an implementation extension. Keywords consisting entirely of lowercase letters, digits, and periods are reserved for future standardization. A keyword shall not include an equals sign. (In the following list, the notations "file(s)" or "block(s)" is used to acknowledge that a keyword affects the following single file after a typeflag x extended header, but possibly multiple files after typeflag g. Any requirements in the list for pax to include a record when in write or copy mode shall apply only when such a record has not already been provided through the use of the option. When used in copy mode, pax shall behave as if an archive had been created with applicable extended header records and then extracted.) atime The file access time for the following file(s), equivalent to the value of the st_atime member of the stat structure for a file, as described by the stat() function. The access time shall be restored if the process has the appropriate privilege required to do so. The format of the <value> shall be as described in pax Extended Header File Times . charset The name of the character set used to encode the data in the following file(s). The entries in the following table are defined to refer to known standards; additional names may be agreed on between the originator and recipient. <value> Formal Standard ISO-IR 646 1990 ISO/IEC 646:1990 ISO-IR 8859 1 1998 ISO/IEC 8859-1:1998 ISO-IR 8859 2 1999 ISO/IEC 8859-2:1999 ISO-IR 8859 3 1999 ISO/IEC 8859-3:1999 ISO-IR 8859 4 1998 ISO/IEC 8859-4:1998 ISO-IR 8859 5 1999 ISO/IEC 8859-5:1999 ISO-IR 8859 6 1999 ISO/IEC 8859-6:1999 ISO-IR 8859 7 1987 ISO/IEC 8859-7:1987 ISO-IR 8859 8 1999 ISO/IEC 8859-8:1999 ISO-IR 8859 9 1999 ISO/IEC 8859-9:1999 ISO-IR 8859 10 1998 ISO/IEC 8859-10:1998 ISO-IR 8859 13 1998 ISO/IEC 8859-13:1998 ISO-IR 8859 14 1998 ISO/IEC 8859-14:1998 ISO-IR 8859 15 1999 ISO/IEC 8859-15:1999 ISO-IR 10646 2000 ISO/IEC 10646:2000 ISO-IR 10646 2000 UTF-8 ISO/IEC 10646, UTF-8 encoding BINARY None. The encoding is included in an extended header for information only; when pax is used as described in IEEE Std 1003.1-2001, it shall not translate the file data into any other encoding. The BINARY entry indicates unencoded binary data. When used in write or copy mode, it is implementation-defined whether pax includes a charset extended header record for a file. comment A series of characters used as a comment. All characters in the <value> field shall be ignored by pax. ctime The file creation time for the following file(s), equivalent to the value of the st_ctime member of the stat structure for a file, as described by the stat() function. The creation time shall be restored if the process has the appropriate privilege required to do so. The format of the <value> shall be as described in pax Extended Header File Times . gid The group ID of the group that owns the file, expressed as a decimal number using digits from the ISO/IEC 646:1991 standard. This record shall override the gid field in the following header block(s). When used in write or copy mode, pax shall include a gid extended header record for each file whose group ID is greater than 2097151 (octal 7777777). gname The group of the file(s), formatted as a group name in the group database. This record shall override the gid and gname fields in the following header block(s), and any gid extended header record. When used in read, copy, or list mode, pax shall translate the name from the UTF-8 encoding in the header record to the character set appropriate for the group database on the receiving system. If any of the UTF-8 characters cannot be translated, and if the invalid= UTF-8 option is not specified, the results are implementation-defined. When used in write or copy mode, pax shall include a gname extended header record for each file whose group name cannot be represented entirely with the letters and digits of the portable character set. linkpath The pathname of a link being created to another file, of any type, previously archived. This record shall override the linkname field in the following ustar header block(s). The following ustar header block shall determine the type of link created. If typeflag of the following header block is 1, it shall be a hard link. If typeflag is 2, it shall be a symbolic link and the linkpath value shall be the contents of the symbolic link. The pax utility shall translate the name of the link (contents of the symbolic link) from the UTF-8 encoding to the character set appropriate for the local file system. When used in write or copy mode, pax shall include a linkpath extended header record for each link whose pathname cannot be represented entirely with the members of the portable character set other than NUL. mtime The file modification time of the following file(s), equivalent to the value of the st_mtime member of the stat structure for a file, as described in the stat() function. This record shall override the mtime field in the following header block(s). The modification time shall be restored if the process has the appropriate privilege required to do so. The format of the <value> shall be as described in pax Extended Header File Times . path The pathname of the following file(s). This record shall override the name and prefix fields in the following header block(s). The pax utility shall translate the pathname of the file from the UTF-8 encoding to the character set appropriate for the local file system. When used in write or copy mode, pax shall include a path extended header record for each file whose pathname cannot be represented entirely with the members of the portable character set other than NUL. realtime.any The keywords prefixed by "realtime." are reserved for future standardization. security.any The keywords prefixed by "security." are reserved for future standardization. size The size of the file in octets, expressed as a decimal number using digits from the ISO/IEC 646:1991 standard. This record shall override the size field in the following header block(s). When used in write or copy mode, pax shall include a size extended header record for each file with a size value greater than 8589934591 (octal 77777777777). uid The user ID of the file owner, expressed as a decimal number using digits from the ISO/IEC 646:1991 standard. This record shall override the uid field in the following header block(s). When used in write or copy mode, pax shall include a uid extended header record for each file whose owner ID is greater than 2097151 (octal 7777777). uname The owner of the following file(s), formatted as a user name in the user database. This record shall override the uid and uname fields in the following header block(s), and any uid extended header record. When used in read, copy, or list mode, pax shall translate the name from the UTF-8 encoding in the header record to the character set appropriate for the user database on the receiving system. If any of the UTF-8 characters cannot be translated, and if the invalid= UTF-8 option is not specified, the results are implementation-defined. When used in write or copy mode, pax shall include a uname extended header record for each file whose user name cannot be represented entirely with the letters and digits of the portable character set. If the <value> field is zero length, it shall delete any header block field, previously entered extended header value, or global extended header value of the same name. If a keyword in an extended header record (or in a option-argument) overrides or deletes a corresponding field in the ustar header block, pax shall ignore the contents of that header block field. Unlike the ustar header block fields, NULs shall not delimit <value>s; all characters within the <value> field shall be considered data for the field. None of the length limitations of the ustar header block fields in ustar Header Block shall apply to the extended header records. pax Extended Header Keyword Precedence This section describes the precedence in which the various header records and fields and command line options are selected to apply to a file in the archive. When pax is used in read or list modes, it shall determine a file attribute in the following sequence: 1. If delete= keyword-prefix is used, the affected attributes shall be determined from step 7., if applicable, or ignored otherwise. 2. If keyword:= is used, the affected attributes shall be ignored. 3. If keyword := value is used, the affected attribute shall be assigned the value. 4. If there is a typeflag x extended header record, the affected attribute shall be assigned the <value>. When extended header records conflict, the last one given in the header shall take precedence. 5. If keyword = value is used, the affected attribute shall be assigned the value. 6. If there is a typeflag g global extended header record, the affected attribute shall be assigned the <value>. When global extended header records conflict, the last one given in the global header shall take precedence. 7. Otherwise, the attribute shall be determined from the ustar header block. pax Extended Header File Times The pax utility shall write an mtime record for each file in write or copy modes if the file's modification time cannot be represented exactly in the ustar header logical record described in ustar Interchange Format . This can occur if the time is out of ustar range, or if the file system of the underlying implementation supports non-integer time granularities and the time is not an integer. All of these time records shall be formatted as a decimal representation of the time in seconds since the Epoch. If a period ( '.' ) decimal point character is present, the digits to the right of the point shall represent the units of a subsecond timing granularity, where the first digit is tenths of a second and each subsequent digit is a tenth of the previous digit. In read or copy mode, the pax utility shall truncate the time of a file to the greatest value that is not greater than the input header file time. In write or copy mode, the pax utility shall output a time exactly if it can be represented exactly as a decimal number, and otherwise shall generate only enough digits so that the same time shall be recovered if the file is extracted on a system whose underlying implementation supports the same time granularity. ustar Interchange Format A ustar archive tape or file shall contain a series of logical records. Each logical record shall be a fixed-size logical record of 512 octets (see below). Although this format may be thought of as being stored on 9-track industry-standard 12.7 mm (0.5 in) magnetic tape, other types of transportable media are not excluded. Each file archived shall be represented by a header logical record that describes the file, followed by zero or more logical records that give the contents of the file. At the end of the archive file there shall be two 512-octet logical records filled with binary zeros, interpreted as an end-of-archive indicator. The logical records may be grouped for physical I/O operations, as described under the blocksize and ustar options. Each group of logical records may be written with a single operation equivalent to the write() function. On magnetic tape, the result of this write shall be a single tape physical block. The last physical block shall always be the full size, so logical records after the two zero logical records may contain undefined data. The header logical record shall be structured as shown in the following table. All lengths and offsets are in decimal.
Table: ustar Header Block Field Name Octet Offset Length (in Octets) name 0 100 mode 100 8 uid 108 8 gid 116 8 size 124 12 mtime 136 12 chksum 148 8 typeflag 156 1 linkname 157 100 magic 257 6 version 263 2 uname 265 32 gname 297 32 devmajor 329 8 devminor 337 8 prefix 345 155
All characters in the header logical record shall be represented in the coded character set of the ISO/IEC 646:1991 standard. For maximum portability between implementations, names should be selected from characters represented by the portable filename character set as octets with the most significant bit zero. If an implementation supports the use of characters outside of slash and the portable filename character set in names for files, users, and groups, one or more implementation-defined encodings of these characters shall be provided for interchange purposes. However, the pax utility shall never create filenames on the local system that cannot be accessed via the procedures described in IEEE Std 1003.1-2001. If a filename is found on the medium that would create an invalid filename, it is implementation-defined whether the data from the file is stored on the file hierarchy and under what name it is stored. The pax utility may choose to ignore these files as long as it produces an error indicating that the file is being ignored. Each field within the header logical record is contiguous; that is, there is no padding used. Each character on the archive medium shall be stored contiguously. The fields magic, uname, and gname are character strings each terminated by a NUL character. The fields name, linkname, and prefix are NUL-terminated character strings except when all characters in the array contain non-NUL characters including the last character. The version field is two octets containing the characters "00" (zero-zero). The typeflag contains a single character. All other fields are leading zero-filled octal numbers using digits from the ISO/IEC 646:1991 standard IRV. Each numeric field is terminated by one or more <space> or NUL characters. The name and the prefix fields shall produce the pathname of the file. A new pathname shall be formed, if prefix is not an empty string (its first character is not NUL), by concatenating prefix (up to the first NUL character), a slash character, and name; otherwise, name is used alone. In either case, name is terminated at the first NUL character. If prefix begins with a NUL character, it shall be ignored. In this manner, pathnames of at most 256 characters can be supported. If a pathname does not fit in the space provided, pax shall notify the user of the error, and shall not store any part of the file-header or data-on the medium. The linkname field, described below, shall not use the prefix to produce a pathname. As such, a linkname is limited to 100 characters. If the name does not fit in the space provided, pax shall notify the user of the error, and shall not attempt to store the link on the medium. The mode field provides 12 bits encoded in the ISO/IEC 646:1991 standard octal digit representation. The encoded bits shall represent the following values:
Table: ustar <emphasis remap='I'>mode</emphasis> Field Bit Value IEEE Std 1003.1-2001 Bit Description 04000 S_ISUID Set UID on execution. 02000 S_ISGID Set GID on execution. 01000 <reserved> Reserved for future standardization. 00400 S_IRUSR Read permission for file owner class. 00200 S_IWUSR Write permission for file owner class. 00100 S_IXUSR Execute/search permission for file owner class. 00040 S_IRGRP Read permission for file group class. 00020 S_IWGRP Write permission for file group class. 00010 S_IXGRP Execute/search permission for file group class. 00004 S_IROTH Read permission for file other class. 00002 S_IWOTH Write permission for file other class. 00001 S_IXOTH Execute/search permission for file other class.
When appropriate privilege is required to set one of these mode bits, and the user restoring the files from the archive does not have the appropriate privilege, the mode bits for which the user does not have appropriate privilege shall be ignored. Some of the mode bits in the archive format are not mentioned elsewhere in this volume of IEEE Std 1003.1-2001. If the implementation does not support those bits, they may be ignored. The uid and gid fields are the user and group ID of the owner and group of the file, respectively. The size field is the size of the file in octets. If the typeflag field is set to specify a file to be of type 1 (a link) or 2 (a symbolic link), the size field shall be specified as zero. If the typeflag field is set to specify a file of type 5 (directory), the size field shall be interpreted as described under the definition of that record type. No data logical records are stored for types 1, 2, or 5. If the typeflag field is set to 3 (character special file), 4 (block special file), or 6 (FIFO), the meaning of the size field is unspecified by this volume of IEEE Std 1003.1-2001, and no data logical records shall be stored on the medium. Additionally, for type 6, the size field shall be ignored when reading. If the typeflag field is set to any other value, the number of logical records written following the header shall be ( size+511)/512, ignoring any fraction in the result of the division. The mtime field shall be the modification time of the file at the time it was archived. It is the ISO/IEC 646:1991 standard representation of the octal value of the modification time obtained from the stat() function. The chksum field shall be the ISO/IEC 646:1991 standard IRV representation of the octal value of the simple sum of all octets in the header logical record. Each octet in the header shall be treated as an unsigned value. These values shall be added to an unsigned integer, initialized to zero, the precision of which is not less than 17 bits. When calculating the checksum, the chksum field is treated as if it were all spaces. The typeflag field specifies the type of file archived. If a particular implementation does not recognize the type, or the user does not have appropriate privilege to create that type, the file shall be extracted as if it were a regular file if the file type is defined to have a meaning for the size field that could cause data logical records to be written on the medium (see the previous description for size). If conversion to a regular file occurs, the pax utility shall produce an error indicating that the conversion took place. All of the typeflag fields shall be coded in the ISO/IEC 646:1991 standard IRV: 0 Represents a regular file. For backwards-compatibility, a typeflag value of binary zero ( '\0' ) should be recognized as meaning a regular file when extracting files from the archive. Archives written with this version of the archive file format create regular files with a typeflag value of the ISO/IEC 646:1991 standard IRV '0' . 1 Represents a file linked to another file, of any type, previously archived. Such files are identified by each file having the same device and file serial number. The linked-to name is specified in the linkname field with a NUL-character terminator if it is less than 100 octets in length. 2 Represents a symbolic link. The contents of the symbolic link shall be stored in the linkname field. 3,4 Represent character special files and block special files respectively. In this case the devmajor and devminor fields shall contain information defining the device, the format of which is unspecified by this volume of IEEE Std 1003.1-2001. Implementations may map the device specifications to their own local specification or may ignore the entry. 5 Specifies a directory or subdirectory. On systems where disk allocation is performed on a directory basis, the size field shall contain the maximum number of octets (which may be rounded to the nearest disk block allocation unit) that the directory may hold. A size field of zero indicates no such limiting. Systems that do not support limiting in this manner should ignore the size field. 6 Specifies a FIFO special file. Note that the archiving of a FIFO file archives the existence of this file and not its contents. 7 Reserved to represent a file to which an implementation has associated some high-performance attribute. Implementations without such extensions should treat this file as a regular file (type 0). A-Z The letters 'A' to 'Z' , inclusive, are reserved for custom implementations. All other values are reserved for future versions of IEEE Std 1003.1-2001. Attempts to archive a socket using ustar interchange format shall produce a diagnostic message. Handling of other file types is implementation-defined. The magic field is the specification that this archive was output in this archive format. If this field contains ustar (the five characters from the ISO/IEC 646:1991 standard IRV shown followed by NUL), the uname and gname fields shall contain the ISO/IEC 646:1991 standard IRV representation of the owner and group of the file, respectively (truncated to fit, if necessary). When the file is restored by a privileged, protection-preserving version of the utility, the user and group databases shall be scanned for these names. If found, the user and group IDs contained within these files shall be used rather than the values contained within the uid and gid fields.
cpio Interchange Format The octet-oriented cpio archive format shall be a series of entries, each comprising a header that describes the file, the name of the file, and then the contents of the file. An archive may be recorded as a series of fixed-size blocks of octets. This blocking shall be used only to make physical I/O more efficient. The last group of blocks shall always be at the full size. For the octet-oriented cpio archive format, the individual entry information shall be in the order indicated and described by the following table; see also the <cpio.h> header.
Table: Octet-Oriented cpio Archive Entry Header Field Name Length (in Octets) Interpreted as c_magic 6 Octal number c_dev 6 Octal number c_ino 6 Octal number c_mode 6 Octal number c_uid 6 Octal number c_gid 6 Octal number c_nlink 6 Octal number c_rdev 6 Octal number c_mtime 11 Octal number c_namesize 6 Octal number c_filesize 11 Octal number Filename Field Name Length Interpreted as c_name c_namesize Pathname string File Data Field Name Length Interpreted as c_filedata c_filesize Data
cpio Header For each file in the archive, a header as defined previously shall be written. The information in the header fields is written as streams of the ISO/IEC 646:1991 standard characters interpreted as octal numbers. The octal numbers shall be extended to the necessary length by appending the ISO/IEC 646:1991 standard IRV zeros at the most-significant-digit end of the number; the result is written to the most-significant digit of the stream of octets first. The fields shall be interpreted as follows: c_magic Identify the archive as being a transportable archive by containing the identifying value "070707" . c_dev, c_ino Contains values that uniquely identify the file within the archive (that is, no files contain the same pair of c_dev and c_ino values unless they are links to the same file). The values shall be determined in an unspecified manner. c_mode Contains the file type and access permissions as defined in the following table.
Table: Values for cpio c_mode Field File Permissions Name Value Indicates C_IRUSR 000400 Read by owner C_IWUSR 000200 Write by owner C_IXUSR 000100 Execute by owner C_IRGRP 000040 Read by group C_IWGRP 000020 Write by group C_IXGRP 000010 Execute by group C_IROTH 000004 Read by others C_IWOTH 000002 Write by others C_IXOTH 000001 Execute by others C_ISUID 004000 Set uid C_ISGID 002000 Set gid C_ISVTX 001000 Reserved File Type Name Value Indicates C_ISDIR 040000 Directory C_ISFIFO 010000 FIFO C_ISREG 0100000 Regular file C_ISLNK 0120000 Symbolic link C_ISBLK 060000 Block special file C_ISCHR 020000 Character special file C_ISSOCK 0140000 Socket C_ISCTG 0110000 Reserved
Directories, FIFOs, symbolic links, and regular files shall be supported on a system conforming to this volume of IEEE Std 1003.1-2001; additional values defined previously are reserved for compatibility with existing systems. Additional file types may be supported; however, such files should not be written to archives intended to be transported to other systems.
c_uid Contains the user ID of the owner. c_gid Contains the group ID of the group. c_nlink Contains the number of links referencing the file at the time the archive was created. c_rdev Contains implementation-defined information for character or block special files. c_mtime Contains the latest time of modification of the file at the time the archive was created. c_namesize Contains the length of the pathname, including the terminating NUL character. c_filesize Contains the length of the file in octets. This shall be the length of the data section following the header structure.
cpio Filename The c_name field shall contain the pathname of the file. The length of this field in octets is the value of c_namesize. If a filename is found on the medium that would create an invalid pathname, it is implementation-defined whether the data from the file is stored on the file hierarchy and under what name it is stored. All characters shall be represented in the ISO/IEC 646:1991 standard IRV. For maximum portability between implementations, names should be selected from characters represented by the portable filename character set as octets with the most significant bit zero. If an implementation supports the use of characters outside the portable filename character set in names for files, users, and groups, one or more implementation-defined encodings of these characters shall be provided for interchange purposes. However, the pax utility shall never create filenames on the local system that cannot be accessed via the procedures described previously in this volume of IEEE Std 1003.1-2001. If a filename is found on the medium that would create an invalid filename, it is implementation-defined whether the data from the file is stored on the local file system and under what name it is stored. The pax utility may choose to ignore these files as long as it produces an error indicating that the file is being ignored. cpio File Data Following c_name, there shall be c_filesize octets of data. Interpretation of such data occurs in a manner dependent on the file. If c_filesize is zero, no data shall be contained in c_filedata. When restoring from an archive: * If the user does not have the appropriate privilege to create a file of the specified type, pax shall ignore the entry and write an error message to standard error. * Only regular files have data to be restored. Presuming a regular file meets any selection criteria that might be imposed on the format-reading utility by the user, such data shall be restored. * If a user does not have appropriate privilege to set a particular mode flag, the flag shall be ignored. Some of the mode flags in the archive format are not mentioned elsewhere in this volume of IEEE Std 1003.1-2001. If the implementation does not support those flags, they may be ignored. cpio Special Entries FIFO special files, directories, and the trailer shall be recorded with c_filesize equal to zero. For other special files, c_filesize is unspecified by this volume of IEEE Std 1003.1-2001. The header for the next file entry in the archive shall be written directly after the last octet of the file entry preceding it. A header denoting the filename TRAILER!!! shall indicate the end of the archive; the contents of octets in the last block of the archive following such a header are undefined.
EXIT STATUS The following exit values shall be returned: 0 All files were processed successfully. >0 An error occurred. CONSEQUENCES OF ERRORS If pax cannot create a file or a link when reading an archive or cannot find a file when writing an archive, or cannot preserve the user ID, group ID, or file mode when the option is specified, a diagnostic message shall be written to standard error and a non-zero exit status shall be returned, but processing shall continue. In the case where pax cannot create a link to a file, pax shall not, by default, create a second copy of the file. If the extraction of a file from an archive is prematurely terminated by a signal or error, pax may have only partially extracted the file or (if the option was not specified) may have extracted a file of the same name as that specified by the user, but which is not the file the user wanted. Additionally, the file modes of extracted directories may have additional bits from the S_IRWXU mask set as well as incorrect modification and access times. The following sections are informative. APPLICATION USAGE The (privileges) option was invented to reconcile differences between historical tar and cpio implementations. In particular, the two utilities use in diametrically opposed ways. The option also provides a consistent means of extending the ways in which future file attributes can be addressed, such as for enhanced security systems or high-performance files. Although it may seem complex, there are really two modes that are most commonly used: ``Preserve everything". This would be used by the historical superuser, someone with all the appropriate privileges, to preserve all aspects of the files as they are recorded in the archive. The e flag is the sum of o and p, and other implementation-defined attributes. ``Preserve" the file mode bits. This would be used by the user with regular privileges who wished to preserve aspects of the file other than the ownership. The file times are preserved by default, but two other flags are offered to disable these and use the time of extraction. The one pathname per line format of standard input precludes pathnames containing <newline>s. Although such pathnames violate the portable filename guidelines, they may exist and their presence may inhibit usage of pax within shell scripts. This problem is inherited from historical archive programs. The problem can be avoided by listing filename arguments on the command line instead of on standard input. It is almost certain that appropriate privileges are required for pax to accomplish parts of this volume of IEEE Std 1003.1-2001. Specifically, creating files of type block special or character special, restoring file access times unless the files are owned by the user (the option), or preserving file owner, group, and mode (the option) all probably require appropriate privileges. In read mode, implementations are permitted to overwrite files when the archive has multiple members with the same name. This may fail if permissions on the first version of the file do not permit it to be overwritten. The cpio and ustar formats can only support files up to 8589934592 bytes (8 * 2^30) in size. EXAMPLES The following command: pax -w -f /dev/rmt/1m . copies the contents of the current directory to tape drive 1, medium density (assuming historical System V device naming procedures-the historical BSD device name would be /dev/rmt9). The following commands: mkdir newdirpax -rw olddir newdir copy the olddir directory hierarchy to newdir. pax -r -s ',^//*usr//*,,' -f a.pax reads the archive a.pax, with all files rooted in /usr in the archive extracted relative to the current directory. Using the option: overrides the default output description in Standard Output and instead writes: Using the options: overrides the default output description in Standard Output and instead writes: /usr/foo/bar -> /tmp 1492 /usr/fo Jan 12 1991 Jan 31 15:53 RATIONALE The pax utility was new for the ISO POSIX-2:1993 standard. It represents a peaceful compromise between advocates of the historical tar and cpio utilities. A fundamental difference between cpio and tar was in the way directories were treated. The cpio utility did not treat directories differently from other files, and to select a directory and its contents required that each file in the hierarchy be explicitly specified. For tar, a directory matched every file in the file hierarchy it rooted. The pax utility offers both interfaces; by default, directories map into the file hierarchy they root. The option causes pax to skip any file not explicitly referenced, as cpio historically did. The tar - style behavior was chosen as the default because it was believed that this was the more common usage and because tar is the more commonly available interface, as it was historically provided on both System V and BSD implementations. The data interchange format specification in this volume of IEEE Std 1003.1-2001 requires that processes with "appropriate privileges" shall always restore the ownership and permissions of extracted files exactly as archived. If viewed from the historic equivalence between superuser and "appropriate privileges", there are two problems with this requirement. First, users running as superusers may unknowingly set dangerous permissions on extracted files. Second, it is needlessly limiting, in that superusers cannot extract files and own them as superuser unless the archive was created by the superuser. (It should be noted that restoration of ownerships and permissions for the superuser, by default, is historical practice in cpio, but not in tar.) In order to avoid these two problems, the pax specification has an additional "privilege" mechanism, the option. Only a pax invocation with the privileges needed, and which has the option set using the e specification character, has the "appropriate privilege" to restore full ownership and permission information. Note also that this volume of IEEE Std 1003.1-2001 requires that the file ownership and access permissions shall be set, on extraction, in the same fashion as the creat() function when provided with the mode stored in the archive. This means that the file creation mask of the user is applied to the file permissions. Users should note that directories may be created by pax while extracting files with permissions that are different from those that existed at the time the archive was created. When extracting sensitive information into a directory hierarchy that no longer exists, users are encouraged to set their file creation mask appropriately to protect these files during extraction. The table of contents output is written to standard output to facilitate pipeline processing. An early proposal had hard links displaying for all pathnames. This was removed because it complicates the output of the case where is not specified and does not match historical cpio usage. The hard-link information is available in the display. The description of the option allows implementations to make hard links to symbolic links. IEEE Std 1003.1-2001 does not specify any way to create a hard link to a symbolic link, but many implementations provide this capability as an extension. If there are hard links to symbolic links when an archive is created, the implementation is required to archive the hard link in the archive (unless or is specified). When in read mode and in copy mode, implementations supporting hard links to symbolic links should use them when appropriate. The archive formats inherited from the POSIX.1-1990 standard have certain restrictions that have been brought along from historical usage. For example, there are restrictions on the length of pathnames stored in the archive. When pax is used in copy( ) mode (copying directory hierarchies), the ability to use extensions from the pax format overcomes these restrictions. The default blocksize value of 5120 bytes for cpio was selected because it is one of the standard block-size values for cpio, set when the option is specified. (The other default block-size value for cpio is 512 bytes, and this was considered to be too small.) The default block value of 10240 bytes for tar was selected because that is the standard block-size value for BSD tar. The maximum block size of 32256 bytes (2**15-512 bytes) is the largest multiple of 512 bytes that fits into a signed 16-bit tape controller transfer register. There are known limitations in some historical systems that would prevent larger blocks from being accepted. Historical values were chosen to improve compatibility with historical scripts using dd or similar utilities to manipulate archives. Also, default block sizes for any file type other than character special file has been deleted from this volume of IEEE Std 1003.1-2001 as unimportant and not likely to affect the structure of the resulting archive. Implementations are permitted to modify the block-size value based on the archive format or the device to which the archive is being written. This is to provide implementations with the opportunity to take advantage of special types of devices, and it should not be used without a great deal of consideration as it almost certainly decreases archive portability. The intended use of the option was to permit extraction of one or more files from the archive without processing the entire archive. This was viewed by the standard developers as offering significant performance advantages over historical implementations. The option in early proposals had three effects; the first was to cause special characters in patterns to not be treated specially. The second was to cause only the first file that matched a pattern to be extracted. The third was to cause pax to write a diagnostic message to standard error when no file was found matching a specified pattern. Only the second behavior is retained by this volume of IEEE Std 1003.1-2001, for many reasons. First, it is in general not acceptable for a single option to have multiple effects. Second, the ability to make pattern matching characters act as normal characters is useful for parts of pax other than file extraction. Third, a finer degree of control over the special characters is useful because users may wish to normalize only a single special character in a single filename. Fourth, given a more general escape mechanism, the previous behavior of the option can be easily obtained using the option or a sed script. Finally, writing a diagnostic message when a pattern specified by the user is unmatched by any file is useful behavior in all cases. In this version, the was removed from the copy mode synopsis of pax; it is inapplicable because there are no pattern operands specified in this mode. There is another method than pax for copying subtrees in IEEE Std 1003.1-2001 described as part of the cp utility. Both methods are historical practice: cp provides a simpler, more intuitive interface, while pax offers a finer granularity of control. Each provides additional functionality to the other; in particular, pax maintains the hard-link structure of the hierarchy while cp does not. It is the intention of the standard developers that the results be similar (using appropriate option combinations in both utilities). The results are not required to be identical; there seemed insufficient gain to applications to balance the difficulty of implementations having to guarantee that the results would be exactly identical. A single archive may span more than one file. It is suggested that implementations provide informative messages to the user on standard error whenever the archive file is changed. The option (do not create intermediate directories not listed in the archive) found in early proposals was originally provided as a complement to the historic option of cpio. It has been deleted. The option in early proposals specified a subset of the substitution command from the ed utility. As there was no reason for only a subset to be supported, the option is now compatible with the current ed specification. Since the delimiter can be any non-null character, the following usage with single spaces is valid: pax -s " foo bar " ... The description is worded so as to note that this may cause the access time update caused by some other activity (which occurs while the file is being read) to be overwritten. The default behavior of pax with regard to file modification times is the same as historical implementations of tar. It is not the historical behavior of cpio. Because the option uses /dev/tty, utilities without a controlling terminal are not able to use this option. The option, found in early proposals, has been deleted because a line containing a single period for the option has equivalent functionality. The special lines for the option (a single period and the empty line) are historical practice in cpio. In early drafts, a charmap option was included to increase portability of files between systems using different coded character sets. This option was omitted because it was apparent that consensus could not be formed for it. In this version, the use of UTF-8 should be an adequate substitute. The option was added to address international concerns about the dangers involved in the character set transformations of (if the target character set were different from the source, the filenames might be transformed into names matching existing files) and also was made more general to protect files transferred between file systems with different {NAME_MAX} values (truncating a filename on a smaller system might also inadvertently overwrite existing files). As stated, it prevents any overwriting, even if the target file is older than the source. This version adds more granularity of options to solve this problem by introducing the invalid= option-specifically the UTF-8 action. (Note that an existing file that is named with a UTF-8 encoding is still subject to overwriting in this case. The option closes that loophole.) Some of the file characteristics referenced in this volume of IEEE Std 1003.1-2001 might not be supported by some archive formats. For example, neither the tar nor cpio formats contain the file access time. For this reason, the e specification character has been provided, intended to cause all file characteristics specified in the archive to be retained. It is required that extracted directories, by default, have their access and modification times and permissions set to the values specified in the archive. This has obvious problems in that the directories are almost certainly modified after being extracted and that directory permissions may not permit file creation. One possible solution is to create directories with the mode specified in the archive, as modified by the umask of the user, with sufficient permissions to allow file creation. After all files have been extracted, pax would then reset the access and modification times and permissions as necessary. The list-mode formatting description borrows heavily from the one defined by the printf utility. However, since there is no separate operand list to get conversion arguments, the format was extended to allow specifying the name of the conversion argument as part of the conversion specification. The T conversion specifier allows time fields to be displayed in any of the date formats. Unlike the ls utility, pax does not adjust the format when the date is less than six months in the past. This makes parsing the output more predictable. The D conversion specifier handles the ability to display the major/minor or file size, as with ls, by using %-8(size)D. The L conversion specifier handles the ls display for symbolic links. Conversion specifiers were added to generate existing known types used for ls. pax Interchange Format The new POSIX data interchange format was developed primarily to satisfy international concerns that the ustar and cpio formats did not provide for file, user, and group names encoded in characters outside a subset of the ISO/IEC 646:1991 standard. The standard developers realized that this new POSIX data interchange format should be very extensible because there were other requirements they foresaw in the near future: * Support international character encodings and locale information * Support security information (ACLs, and so on) * Support future file types, such as realtime or contiguous files * Include data areas for implementation use * Support systems with words larger than 32 bits and timers with subsecond granularity The following were not goals for this format because these are better handled by separate utilities or are inappropriate for a portable format: * Encryption * Compression * Data translation between locales and codesets * inode storage The format chosen to support the goals is an extension of the ustar format. Of the two formats previously available, only the ustar format was selected for extensions because: * It was easier to extend in an upwards-compatible way. It offered version flags and header block type fields with room for future standardization. The cpio format, while possessing a more flexible file naming methodology, could not be extended without breaking some theoretical implementation or using a dummy filename that could be a legitimate filename. * Industry experience since the original " tar wars" fought in developing the ISO POSIX-1 standard has clearly been in favor of the ustar format, which is generally the default output format selected for pax implementations on new systems. The new format was designed with one additional goal in mind: reasonable behavior when an older tar or pax utility happened to read an archive. Since the POSIX.1-1990 standard mandated that a "format-reading utility" had to treat unrecognized typeflag values as regular files, this allowed the format to include all the extended information in a pseudo-regular file that preceded each real file. An option is given that allows the archive creator to set up reasonable names for these files on the older systems. Also, the normative text suggests that reasonable file access values be used for this ustar header block. Making these header files inaccessible for convenient reading and deleting would not be reasonable. File permissions of 600 or 700 are suggested. The ustar typeflag field was used to accommodate the additional functionality of the new format rather than magic or version because the POSIX.1-1990 standard (and, by reference, the previous version of pax), mandated the behavior of the format-reading utility when it encountered an unknown typeflag, but was silent about the other two fields. Early proposals of the first revision to IEEE Std 1003.1-2001 contained a proposed archive format that was based on compatibility with the standard for tape files (ISO 1001, similar to the format used historically on many mainframes and minicomputers). This format was overly complex and required considerable overhead in volume and header records. Furthermore, the standard developers felt that it would not be acceptable to the community of POSIX developers, so it was later changed to be a format more closely related to historical practice on POSIX systems. The prefix and name split of pathnames in ustar was replaced by the single path extended header record for simplicity. The concept of a global extended header ( typeflag g) was controversial. If this were applied to an archive being recorded on magnetic tape, a few unreadable blocks at the beginning of the tape could be a serious problem; a utility attempting to extract as many files as possible from a damaged archive could lose a large percentage of file header information in this case. However, if the archive were on a reliable medium, such as a CD-ROM, the global extended header offers considerable potential size reductions by eliminating redundant information. Thus, the text warns against using the global method for unreliable media and provides a method for implanting global information in the extended header for each file, rather than in the typeflag g records. No facility for data translation or filtering on a per-file basis is included because the standard developers could not invent an interface that would allow this in an efficient manner. If a filter, such as encryption or compression, is to be applied to all the files, it is more efficient to apply the filter to the entire archive as a single file. The standard developers considered interfaces that would invoke a shell script for each file going into or out of the archive, but the system overhead in this approach was considered to be too high. One such approach would be to have filter= records that give a pathname for an executable. When the program is invoked, the file and archive would be open for standard input/output and all the header fields would be available as environment variables or command-line arguments. The standard developers did discuss such schemes, but they were omitted from IEEE Std 1003.1-2001 due to concerns about excessive overhead. Also, the program itself would need to be in the archive if it were to be used portably. There is currently no portable means of identifying the character set(s) used for a file in the file system. Therefore, pax has not been given a mechanism to generate charset records automatically. The only portable means of doing this is for the user to write the archive using the charset= string command line option. This assumes that all of the files in the archive use the same encoding. The "implementation-defined" text is included to allow for a system that can identify the encodings used for each of its files. The table of standards that accompanies the charset record description is acknowledged to be very limited. Only a limited number of character set standards is reasonable for maximal interchange. Any character set is, of course, possible by prior agreement. It was suggested that EBCDIC be listed, but it was omitted because it is not defined by a formal standard. Formal standards, and then only those with reasonably large followings, can be included here, simply as a matter of practicality. The <value>s represent names of officially registered character sets in the format required by the ISO 2375:1985 standard. The normal comma or <blank>-separated list rules are not followed in the case of keyword options to allow ease of argument parsing for getopts. Further information on character encodings is in pax Archive Character Set Encoding/Decoding . The standard developers have reserved keyword name space for vendor extensions. It is suggested that the format to be used is: VENDOR.keyword where VENDOR is the name of the vendor or organization in all uppercase letters. It is further suggested that the keyword following the period be named differently than any of the standard keywords so that it could be used for future standardization, if appropriate, by omitting the VENDOR prefix. The <length> field in the extended header record was included to make it simpler to step through the records, even if a record contains an unknown format (to a particular pax) with complex interactions of special characters. It also provides a minor integrity checkpoint within the records to aid a program attempting to recover files from a damaged archive. There are no extended header versions of the devmajor and devminor fields because the unspecified format ustar header field should be sufficient. If they are not, vendor-specific extended keywords (such as VENDOR.devmajor) should be used. Device and i-number labeling of files was not adopted from cpio; files are interchanged strictly on a symbolic name basis, as in ustar. Just as with the ustar format descriptions, the new format makes no special arrangements for multi-volume archives. Each of the pax archive types is assumed to be inside a single POSIX file and splitting that file over multiple volumes (diskettes, tape cartridges, and so on), processing their labels, and mounting each in the proper sequence are considered to be implementation details that cannot be described portably. The pax format is intended for interchange, not only for backup on a single (family of) systems. It is not as densely packed as might be possible for backup: * It contains information as coded characters that could be coded in binary. * It identifies extended records with name fields that could be omitted in favor of a fixed-field layout. * It translates names into a portable character set and identifies locale-related information, both of which are probably unnecessary for backup. The requirements on restoring from an archive are slightly different from the historical wording, allowing for non-monolithic privilege to bring forward as much as possible. In particular, attributes such as "high performance file" might be broadly but not universally granted while set-user-ID or chown() might be much more restricted. There is no implication in IEEE Std 1003.1-2001 that the security information be honored after it is restored to the file hierarchy, in spite of what might be improperly inferred by the silence on that topic. That is a topic for another standard. Links are recorded in the fashion described here because a link can be to any file type. It is desirable in general to be able to restore part of an archive selectively and restore all of those files completely. If the data is not associated with each link, it is not possible to do this. However, the data associated with a file can be large, and when selective restoration is not needed, this can be a significant burden. The archive is structured so that files that have no associated data can always be restored by the name of any link name of any link, and the user may choose whether data is recorded with each instance of a file that contains data. The format permits mixing of both types of links in a single archive; this can be done for special needs, and pax is expected to interpret such archives on input properly, despite the fact that there is no pax option that would force this mixed case on output. (When linkdata is used, the output must contain the duplicate data, but the implementation is free to include it or omit it when linkdata is not used.) The time values are included as extended header records for those implementations needing more than the eleven octal digits allowed by the ustar format. Portable file timestamps cannot be negative. If pax encounters a file with a negative timestamp in copy or write mode, it can reject the file, substitute a non-negative timestamp, or generate a non-portable timestamp with a leading '-' . Even though some implementations can support finer file-time granularities than seconds, the normative text requires support only for seconds since the Epoch because the ISO POSIX-1 standard states them that way. The ustar format includes only mtime; the new format adds atime and ctime for symmetry. The atime access time restored to the file system will be affected by the a and e options. The ctime creation time (actually inode modification time) is described with "appropriate privilege" so that it can be ignored when writing to the file system. POSIX does not provide a portable means to change file creation time. Nothing is intended to prevent a non-portable implementation of pax from restoring the value. The gid, size, and uid extended header records were included to allow expansion beyond the sizes specified in the regular tar header. New file system architectures are emerging that will exhaust the 12-digit size field. There are probably not many systems requiring more than 8 digits for user and group IDs, but the extended header values were included for completeness, allowing overrides for all of the decimal values in the tar header. The standard developers intended to describe the effective results of pax with regard to file ownerships and permissions; implementations are not restricted in timing or sequencing the restoration of such, provided the results are as specified. Much of the text describing the extended headers refers to use in " write or copy modes". The copy mode references are due to the normative text: "The effect of the copy shall be as if the copied files were written to an archive file and then subsequently extracted ...". There is certainly no way to test whether pax is actually generating the extended headers in copy mode, but the effects must be as if it had. pax Archive Character Set Encoding/Decoding There is a need to exchange archives of files between systems of different native codesets. Filenames, group names, and user names must be preserved to the fullest extent possible when an archive is read on the receiving platform. Translation of the contents of files is not within the scope of the pax utility. There will also be the need to represent characters that are not available on the receiving platform. These unsupported characters cannot be automatically folded to the local set of characters due to the chance of collisions. This could result in overwriting previous extracted files from the archive or pre-existing files on the system. For these reasons, the codeset used to represent characters within the extended header records of the pax archive must be sufficiently rich to handle all commonly used character sets. The fields requiring translation include, at a minimum, filenames, user names, group names, and link pathnames. Implementations may wish to have localized extended keywords that use non-portable characters. The standard developers considered the following options: * The archive creator specifies the well-defined name of the source codeset. The receiver must then recognize the codeset name and perform the appropriate translations to the destination codeset. * The archive creator includes within the archive the character mapping table for the source codeset used to encode extended header records. The receiver must then read the character mapping table and perform the appropriate translations to the destination codeset. * The archive creator translates the extended header records in the source codeset into a canonical form. The receiver must then perform the appropriate translations to the destination codeset. The approach that incorporates the name of the source codeset poses the problem of codeset name registration, and makes the archive useless to pax archive decoders that do not recognize that codeset. Because parts of an archive may be corrupted, the standard developers felt that including the character map of the source codeset was too fragile. The loss of this one key component could result in making the entire archive useless. (The difference between this and the global extended header decision was that the latter has a workaround-duplicating extended header records on unreliable media-but this would be too burdensome for large character set maps.) Both of the above approaches also put an undue burden on the pax archive receiver to handle the cross-product of all source and destination codesets. To simplify the translation from the source codeset to the canonical form and from the canonical form to the destination codeset, the standard developers decided that the internal representation should be a stateless encoding. A stateless encoding is one where each codepoint has the same meaning, without regard to the decoder being in a specific state. An example of a stateful encoding would be the Japanese Shift-JIS; an example of a stateless encoding would be the ISO/IEC 646:1991 standard (equivalent to 7-bit ASCII). For these reasons, the standard developers decided to adopt a canonical format for the representation of file information strings. The obvious, well-endorsed candidate is the ISO/IEC 10646-1:2000 standard (based in part on Unicode), which can be used to represent the characters of virtually all standardized character sets. The standard developers initially agreed upon using UCS2 (16-bit Unicode) as the internal representation. This repertoire of characters provides a sufficiently rich set to represent all commonly-used codesets. However, the standard developers found that the 16-bit Unicode representation had some problems. It forced the issue of standardizing byte ordering. The 2-byte length of each character made the extended header records twice as long for the case of strings coded entirely from historical 7-bit ASCII. For these reasons, the standard developers chose the UTF-8 defined in the ISO/IEC 10646-1:2000 standard. This multi-byte representation encodes UCS2 or UCS4 characters reliably and deterministically, eliminating the need for a canonical byte ordering. In addition, NUL octets and other characters possibly confusing to POSIX file systems do not appear, except to represent themselves. It was realized that certain national codesets take up more space after the encoding, due to their placement within the UCS range; it was felt that the usefulness of the encoding of the names outweighs the disadvantage of size increase for file, user, and group names. The encoding of UTF-8 is as follows: UCS4 Hex Encoding UTF-8 Binary Encoding 00000000-0000007F 0xxxxxxx 00000080-000007FF 110xxxxx 10xxxxxx 00000800-0000FFFF 1110xxxx 10xxxxxx 10xxxxxx 00010000-001FFFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 00200000-03FFFFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 04000000-7FFFFFFF 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx where each 'x' represents a bit value from the character being translated. ustar Interchange Format The description of the ustar format reflects numerous enhancements over pre-1988 versions of the historical tar utility. The goal of these changes was not only to provide the functional enhancements desired, but also to retain compatibility between new and old versions. This compatibility has been retained. Archives written using the old archive format are compatible with the new format. Implementors should be aware that the previous file format did not include a mechanism to archive directory type files. For this reason, the convention of using a filename ending with slash was adopted to specify a directory on the archive. The total size of the name and prefix fields have been set to meet the minimum requirements for {PATH_MAX}. If a pathname will fit within the name field, it is recommended that the pathname be stored there without the use of the prefix field. Although the name field is known to be too small to contain {PATH_MAX} characters, the value was not changed in this version of the archive file format to retain backwards-compatibility, and instead the prefix was introduced. Also, because of the earlier version of the format, there is no way to remove the restriction on the linkname field being limited in size to just that of the name field. The size field is required to be meaningful in all implementation extensions, although it could be zero. This is required so that the data blocks can always be properly counted. It is suggested that if device special files need to be represented that cannot be represented in the standard format, that one of the extension types ( A- Z) be used, and that the additional information for the special file be represented as data and be reflected in the size field. Attempting to restore a special file type, where it is converted to ordinary data and conflicts with an existing filename, need not be specially detected by the utility. If run as an ordinary user, pax should not be able to overwrite the entries in, for example, /dev in any case (whether the file is converted to another type or not). If run as a privileged user, it should be able to do so, and it would be considered a bug if it did not. The same is true of ordinary data files and similarly named special files; it is impossible to anticipate the needs of the user (who could really intend to overwrite the file), so the behavior should be predictable (and thus regular) and rely on the protection system as required. The value 7 in the typeflag field is intended to define how contiguous files can be stored in a ustar archive. IEEE Std 1003.1-2001 does not require the contiguous file extension, but does define a standard way of archiving such files so that all conforming systems can interpret these file types in a meaningful and consistent manner. On a system that does not support extended file types, the pax utility should do the best it can with the file and go on to the next. The file protection modes are those conventionally used by the ls utility. This is extended beyond the usage in the ISO POSIX-2 standard to support the "shared text" or "sticky" bit. It is intended that the conformance document should not document anything beyond the existence of and support of such a mode. Further extensions are expected to these bits, particularly with overloading the set-user-ID and set-group-ID flags. cpio Interchange Format The reference to appropriate privilege in the cpio format refers to an error on standard output; the ustar format does not make comparable statements. The model for this format was the historical System V cpio data interchange format. This model documents the portable version of the cpio format and not the binary version. It has the flexibility to transfer data of any type described within IEEE Std 1003.1-2001, yet is extensible to transfer data types specific to extensions beyond IEEE Std 1003.1-2001 (for example, contiguous files). Because it describes existing practice, there is no question of maintaining upwards-compatibility. cpio Header There has been some concern that the size of the c_ino field of the header is too small to handle those systems that have very large inode numbers. However, the c_ino field in the header is used strictly as a hard-link resolution mechanism for archives. It is not necessarily the same value as the inode number of the file in the location from which that file is extracted. The name c_magic is based on historical usage. cpio Filename For most historical implementations of the cpio utility, {PATH_MAX} octets can be used to describe the pathname without the addition of any other header fields (the NUL character would be included in this count). {PATH_MAX} is the minimum value for pathname size, documented as 256 bytes. However, an implementation may use c_namesize to determine the exact length of the pathname. With the current description of the <cpio.h> header, this pathname size can be as large as a number that is described in six octal digits. Two values are documented under the c_mode field values to provide for extensibility for known file types: 0110 000 Reserved for contiguous files. The implementation may treat the rest of the information for this archive like a regular file. If this file type is undefined, the implementation may create the file as a regular file. This provides for extensibility of the cpio format while allowing for the ability to read old archives. Files of an unknown type may be read as "regular files" on some implementations. On a system that does not support extended file types, the pax utility should do the best it can with the file and go on to the next. FUTURE DIRECTIONS None. SEE ALSO Shell Command Language , cp , ed , getopts , ls , printf() , the Base Definitions volume of IEEE Std 1003.1-2001, <cpio.h>, the System Interfaces volume of IEEE Std 1003.1-2001, chown(), creat(), mkdir(), mkfifo(), stat(), utime(), write() COPYRIGHT Portions of this text are reprinted and reproduced in electronic form from IEEE Std 1003.1, 2003 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2003 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between this version and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html . doclifter-2.11/tests/grap.chk0000664000175000017500000015422312152465736014362 0ustar esresr GRAP 1 grap Kernighan and Bentley's language for typesetting graphs grap --d defines_file --D --l --M include path --R --r --v --u --C --c --h filename DESCRIPTION grap is an implementation of Kernighan and Bentley's language for typesetting graphs, as described in “Grap-A Language for Typesetting Graphs, Tutorial and User Manual,” by Jon L. Bentley and Brian W. Kernighan, revised May 1991, which is the primary source for information on how to use grap As of this writing, it is available electronically at http://www.kohala.com/start/troff/cstr114.ps. Additional documentation and examples, packaged with grap may have been installed locally as well. If available, paths to them can be displayed using grap or grap (or grap / grap ) This version is a black box implementation of grap and some inconsistencies are to be expected. The remainder of this manual page will briefly outline the grap language as implemented here. grap is a pic1 pre-processor. It takes commands embedded in a troff1 source file which are surrounded by .G1 and .G2 macros, and rewrites them into pic commands to display the graph. Other lines are copied. Output is always to the standard output, which is usually redirected. Input is from the given filename,s which are read in order. A filename of - is the standard input. If no filenames are given, input is read from the standard input. Because grap is a pic preprocessor, and GNU pic will output TeX, it is possible to use grap with TeX. The option specifies a file of macro definitions to be read at startup, and defaults to /usr/local/share/grap/grap.defines . The option inhibits the reading of any initial macros file (the flag is a synonym for , though I do not remember why). The defines file can also be given using the GRAP_DEFINES environment variable. (See below). prints the version information on the standard output and exits. is a synonym for . makes labels unaligned by default. This version of grap uses new features of GNU pic to align the left and right labels with the axes, that is that the left and right labels run at right angles to the text of the paper. This may be useful in porting old grap programs. makes plot strings unclipped by default. Some versions of grap allow users to place a string anywhere in the coordinate space, rather than only in the frame. By default this version of grap does not plot any string centered outside the frame. allows strings to be placed anywhere. See also the clipped and unclipped string modifiers described in the plot statement. is followed by a colon-separated list of directories used to search for relative pathnames included via copy. The path is also used to locate the defines file, so if the changes the defines file name to a relative name, it will be searched for in the path given by . The search path always includes the current directory, and by default that directory is searched last. All numbers used internally by grap are double precision floating point values. Sometimes using floating point numbers has unintended consequences. To help avoid these problems, grap can use two thresholds for comparison of floating point numbers, set by or . The flag sets coarse comparison mode, which is suitable for most applications. If you are plotting small values – less than 1e-6 or so – consider using which uses very fine comparisons between numbers. You may also want to rescale your plotted values to be larger in magnitude. The coarse comarisons are used by default. To be precise, the value by which two numbers must differ for grap to consider them not equal is called the comparison limit and the smallest non-zero number is called the minimum value. The values a given version of grap uses for these are included in the output of or . All grap commands are included between .G1 and .G2 macros, which are consumed by grap The output contains pic between .PS and .PE macros. Any arguments to the .G1 macro in the input are arguments to the .PS macro in the output, so graphs can be scaled just like pic diagrams. If is given, any macro beginning with .G1 or .G2 is treated as a .G1 or .G2 macro, for compatibility with old versions of troff. Using also forces pure troff syntax on embedded font change commands when strings have the size attribute, and all strings to be unclipped. The flag prints a brief help message and exits. is a synonym for . It is possible for someone to cause grap to fail by passing a bad format string and data to the sprintf command. If grap is integrated as part of the printing system, this could conceivably provided a path to breaching security on the machine. If you choose to use grap as part of a printing system run by the super-user, you should disable sprintf commands. This can be done by calling grap with the flag, setting the GRAP_SAFER environment variable, or compiling with the GRAP_SAFER preprocessor symbol defined. (The GNU configure script included with grap will define that preprocessor symbol if the option is given.) The grap commands are sketched below. Refer to Kernighan and Bentley's paper for the details. New versions of groff1 will invoke grap if is given. Commands Commands are separated from one another by newlines or semicolons (;). frame line_description [Bk -words ht height | wid width Ek] [Bk -words [Sm off (top | bottom | left | Sm on right) line_description Ek] ... ] frame [Bk -words ht height | wid width Ek] line_description [Bk -words [Sm off (top | bottom | left | Sm on right) line_description Ek] ... ] This describes how the axes for the graph are drawn. A line_description is a pic line description, e.g., dashed 0.5, or the literal solid. It may also include a color keyword followed by the color to draw the string in double quotes. Any color understood by the underlying groff system can be used. Color can only be used under GNU pic, and is not available in compatibility mode. Similarly, for pic implementations that understand thickness, that attribute may be used with a real valued parameter. Thickness is not available in compatibility mode. If the first line_description is given, the frame is drawn with that style. The default is solid. The height and width of the frame can also be specified in inches. The default line style can be over-ridden for sides of the frame by specifying additional parameters to frame. If no plotting commands have been given before the frame command is issued, the frame will be output at that point in the plotting stream relative to embedded troff or pic commands. Otherwise the frame is output before the first plotted object (even invisible ones). ht and wid are in inches by default, but can be any groff unit. If omitted, the dimensions are 2 inches high by 3 inches wide. coord name x expr, expr y expr, expr [log x | log y | log log] The coord command specifies a new coordinate system or sets limits on the default system. It defines the largest and smallest values that can be plotted, and therefore the scale of the data in the frame. The limits for the x and y coordinate systems can be given separately. If a name is given, that coordinate system is defined, if not the default system is modified. A coordinate system created by one coord command may be modified by subsequent coord commands. A grap program may declare a coordinate space using coord, copy a file of data through a macro that plots the data and finds its maxima and minima, and then define the size of the coordinate system with a second coord statement. This command also determines if a scale is plotted logarithmically. log log means the same thing as log x log y. draw line_name line_description plot_string The draw command defines the style with which a given line will be plotted. If line_name is given, the style is associated with that name, otherwise the default style is set. line_description is a pic line description, and the optional plot_string is a string to be centered at each point. The default line description is invis, and the default plotting string is a centered bullet, so by default each point is a filled circle, and they are unconnected. If points are being connected, each draw command ends any current line and begins a new one. When defining a line style, that is the first draw command for a given line name, specifying no plot string means that there are to be no plot strings. Omitting the plot string on subsequent draw commands addressing the same named line means not to change the plot string. If a line has been defined with a plot string, and the format is changed by a subsequent draw statement, the plot string can be removed by specifying "" in the draw statement. The plot string can have its format changed through several string_modifiers. String_modifiers are described in the description of the plot command. The standard defines file includes several macros useful as plot strings, including bullet, square, and delta. new is a synonym for draw. next line_name at coordinates_name expr, expr line_description The next command plots the given point using the line style given by line_name, or the default if none is given. If line_name is given, it should have been defined by an earlier draw command, if not a new line style with that name is created, initialized the same way as the default style. The two expressions give the point's x and y values, relative to the optional coordinate system. That system should have been defined by an earlier coord command, if not, grap will exit. If the optional line_description is given, it overrides the style's default line description. You cannot over-ride the plotting string. To use a different plotting string use the plot command. The coordinates may optionally be enclosed in parentheses: (expr, expr) quoted_string string_modifiers [, quoted_string [string_modifiers] ] ... at coordinates_name expr, expr plot expr format_string at coordinates_name expr, expr These commands both plot a string at the given point. In the first case the literal strings are stacked above each other. The string_modifiers include the pic justification modifiers , (ljust rjust, above, and below), and absolute and relative size modifiers. See the pic documentation for the description of the justification modifiers. grap also supports the aligned and unaligned modifiers which are briefly noted in the description of the label command. The standard defines file includes several macros useful as plot strings, including bullet, square, and delta. Strings placed by either format of the plot command are restricted to being within the frame. This can be overridden by using the unclipped attribute, which allows a string to be plotted in or out of the frame. The and flags set unclipped on all strings, and to prevent a string from being plotted outside the frame when those flags are active, the clipped attribute can be used to retore clipping behavior. Though clipped or unclipped can be applied to any string, it only has meaning for plot statements. size expr sets the string size to expr points. If expr is preceded by a + or -, the size is increased or decreased by that many points. If color and a color name in double quotes appears, the string will be rendered in that color under a version of GNU troff that supports color. Color is not available in compatibility mode. In the second version, the expr is converted to a string and placed on the graph. format_string is a printf3 format string. Only formatting escapes for printing floating point numbers make sense. The format string is only respected if the sprintf command is also active. See the description of sprintf for the various ways to disable it. Plot and sprintf respond differently when grap is running safely. Sprintf ignores any arguments, passing the format string through without substitution. plot ignores the format string completely, plotting expr using the "%g" format. Points are specified the same way as for next commands, with the same consequences for undefined coordinate systems. The second form of this command is because the first form can be used with a grap sprintf expression (See Expressions). ticks (left | right | top | bottom) [Smon(in|out)expr] [on|autoSmoncoord_name] ticks (left | right | top | bottom) (in|out) expr [up expr | down expr | left expr | right expr] at coord_name expr format_string [[, expr [format_string] ] ] ticks (left | right | top | bottom) (in|out) expr [up expr | down expr | left expr | right expr] from [coord_name] start_expr to end_expr [by Sm off [+ | - | * | / Sm on] by_expr ] [format_string] ticks [left | right | top | bottom] off This command controls the placement of ticks on the frame. By default, ticks are automatically generated on the left and bottom sides of the frame. The first version of this command turns on the automatic tick generation for a given side. The in or out parameter controls the direction and length of the ticks. If a coord_name is specified, the ticks are automatically generated using that coordinate system. If no system is specified, the default coordinate system is used. As with next and plot, the coordinate system must be declared before the ticks statement that references it. This syntax for requesting automatically generated ticks is an extension, and will not port to older grap implementations. The second version of the ticks command overrides the automatic placement of the ticks by specifying a list of coordinates at which to place the ticks. If the ticks are not defined with respect to the default coordinate system, the coord_name parameter must be given. For each tick a printf3 style format string can be given. The format_string defaults to "%g". The format string can also take string modifiers as described in the plot command. To place ticks with no labels, specify format_string as "". If sprintf is disabled, ticks behaves as plot with respect to the format string. The labels on the ticks may be shifted by specifying a direction and the distance in inches to offset the label. That is the optional direction and expression immediately preceding the at. The third format of the ticks command over-rides the default tick generation with a set of ticks ar regular intervals. The syntax is reminiscent of programming language for loops. Ticks are placed starting at start_expr ending at end_expr one unit apart. If the by clause is specified, ticks are by_expr units apart. If an operator appears before by_expr each tick is operated on by that operator instead of +. For example ticks left out from 2 to 32 by *2 will put ticks at 2, 4, 8, 16, and 32. If format_string is specified, all ticks are formatted using it. The parameters preceding the from act as described above. The at and for forms of tick command may both be issued on the same side of a frame. For example: ticks left out from 2 to 32 by *2 ticks left in 3, 5, 7 will put ticks on the left side of the frame pointing out at 2, 4, 8, 16, and 32 and in at 3, 5, and 7. The final form of ticks turns off ticks on a given side. If no side is given the ticks for all sides are cancelled. tick is a synonym for ticks. grid (left | right | top | bottom) ticks off line_description [up expr | down expr | left expr | right expr] [Sm off on | auto Sm on coord_name] grid (left | right | top | bottom) ticks off line_description [up expr | down expr | left expr | right expr] at coord_name expr format_string [[, expr [format_string] ] ] grid (left | right | top | bottom) ticks off line_description [up expr | down expr | left expr | right expr] from [coord_name] start_expr to end_expr [by Sm off [+ | - | * | / Sm on] by_expr ] [format_string] The grid command is similar to the ticks command except that grid specifies the placement of lines in the frame. The syntax is similar to ticks as well. By specifying ticks off in the command, no ticks are drawn on that side of the frame. If ticks appear on a side by default, or have been declared by an earlier ticks command, grid does not cancel them unless ticks off is specified. Instead of a direction for ticks, grid allows the user to pick a line description for the grid lines. The usual pic line descriptions are allowed. Grids are labelled by default. To omit labels, specify the format string as "". If sprintf is disabled, grid behaves as plot with respect to the format string. label (left | right | top | bottom) quoted_string string_modifiers [, quoted_string [string_modifiers] ] ... [up expr | down expr | left expr | right expr] The label command places a label on the given axis. It is possible to specify several labels, which will be stacked over each other as in pic. The final argument, if present, specifies how many inches the label is shifted from the axis. By default the labels on the left and right labels run parallel to the frame. You can cancel this by specifying unaligned as a string_modifier. circle at coordinate_name expr, expr radius expr linedesc This draws an circle at the point indicated. By default, the circle is small, 0.025 inches. This can be over-ridden by specifying a radius. The coordinates of the point are relative to the named coordinate system, or the default system if none is specified. This command has been extended to take a line description, e.g., dotted. It also accepts the filling extensions described below in the bar command. It will also accept a color keyword that gives the color of the outline of the circle in double quotes and a fillcolor command that sets the color to fill the circle with similarly. Colors are only available when compatibility mode is off, and using a version of GNU pic that supports color. line line_description from coordinate_name expr, expr to coordinate_name expr, expr line_description arrow line_description from coordinate_name expr, expr to coordinate_name expr, expr line_description This draws a line or arrow from the first point to the second using the given style. The default line style is solid. The line_description can be given either before the from or after the to clause. If both are given the second is used. It is possible to specify one point in one coordinate system and one in another, note that if both points are in a named coordinate system (even if they are in the same named coordinate system), both points must have coordinate_name given. copy "Ar filename" until "Ar string" thru macro The copy command imports data from another file into the current graph. The form with only a filename given is a simple file inclusion; the included file is simply read into the input stream and can contain arbitrary grap commands. The more common case is that it is a number list; see Number Lists below. The second form takes lines from the file, splits them into words delimited by one or more spaces, and calls the given macro with those words as parameters. The macro may either be defined here, or be a macro defined earlier. See Macros for more information on macros. The filename may be omitted if the until clause is present. If so the current file is treated as the input file until string is encountered at the beginning of the line. copy is one of the workhorses of grap Check out the paper and /usr/local/share/examples/grap for more details. Confirm the location of the examples directory using the flag. print (expr|string) Prints its argument to the standard error. sh block This passes block to sh1. Unlike K&B grap no macro or variable expansion is done. I believe that this is also true for GNU pic version 1.10. See the Macros section for information on defining blocks. pic pic_statement This issues the given pic statements in the enclosing .PS and .PE at the point where the command is issued. Statements that begin with a period are considered to be troffstatements and are output in the enclosing .PS and .PE at the point where the command appears. For the purposes of relative placement of pic or troff commands, the frame is output immediately before the first plotted object, or the frame statement, if any. If the user specifies pic or troff commands and neither any plotable object nor a frame command, the commands will not be output. graph Name pic_commands This command is used to position graphs with respect to each other. The current graph is given the pic name Name (names used by pic begin with capital letters). Any pic commands following the graph are used to position the next graph. The frame of the graph is available for use with pic name Frame. The following places a second graph below the first: graph Linear [ graph description ] graph Exponential with .Frame.n at \ Linear.Frame.s - (0, .05) [ graph description ] name = expr This assigns expr to the variable name. grap has only numeric (double) variables. Assignment creates a variable if it does not exist. Variables persist across graphs. Assignments can cascade; a = b = 35 assigns 35 to a and b. bar ()up | right coordinates_name offset ht height wid width base base_offset line_description bar coordinates_name expr, expr, coordinates_name expr, expr, line_description The bar command facilitates drawing bar graphs. The first form of the command describes the bar somewhat generally and has grap place it. The bar may extend up or to the right, is centered on offset and extends up or right height units (in the given coordinate system). For example bar up 3 ht 2 draws a 2 unit high bar sitting on the x axis, centered on x=3. By default bars are 1 unit wide, but this can be changed with the wid keyword. By default bars sit on the base axis, i.e., bars directed up will extend from y=0. That may be overridden by the base keyword. (The bar described above has corners (2.5, 0) and (3.5, 2).) The line description has been extended to include a fill expr keyword that specifies the shading inside the bar. Bars may be drawn in any line style. They support the color and fillcolor keywords described under circle. The second form of the command draws a box with the two points as corners. This can be used to draw boxes highlighting certain data as well as bar graphs. Note that filled bars will cover data drawn under them. Control Flow if expr then block else block The if statement provides simple conditional execution. If expr is non-zero, the block after the then statement is executed. If not the block after the else is executed, if present. See Macros for the definition of blocks. Early versions of this implementation of grap treated the blocks as macros that were defined and expanded in place. This led to unnecessary confusion because explicit separators were sometimes called for. Now, grap inserts a separator (;) after the last character in block, so constructs like if (x == 3) { y = y + 1 } x = x + 1 behave as expected. A separator is also appended to the end of a for block. for name from from_expr to to_expr [by +|-|*|/ by_expr] do block This command executes block iteratively. The variable name is set to from_expr and incremented by by_expr until it exceeds to_expr. The iteration has the semantics defined in the ticks command. The definition of block is discussed in Macros. See also the note about implicit separators in the description of the if command. An = can be used in place of from. Expressions grap supports most standard arithmetic operators: + - / * ^. The carat (^) is exponentiation. In an if statement grap also supports the C logical operators ==, !=, &&, || and unary !. Also in an if, == and != are overloaded for the comparison of quoted strings. Parentheses are used for grouping. Assignment is not allowed in an expression in any context, except for simple cascading of assignments. a = b = 35 works as expected; a = 3.5 * (b = 10) does not execute. grap supports the following functions that take one argument: log, exp, int, sin, cos, sqrt, rand. The logarithms are base 10 and the trigonometric functions are in radians. eexp returns Euler's number to the given power and ln returns the natural logarithm. The natural log and exponentiation functions are extensions and are probably not available in other grap implementations. rand returns a random number uniformly distributed on [0,1). The following two-argument functions are supported: atan2, min, max. atan2 works just like atan23. The random number generator can be seeded by calling srand with a single parameter (converted internally to an integer). Because its return value is of no use, you must use srand as a separate statement, it is not part of a valid expression. srand is not portable. The getpid function takes no arguments and returns the process id. This may be used to seed the random number generator, but do not expect cryptographically random values to result. Other than string comparison, no expressions can use strings. One string valued function exists: sprintf (, format [expr , expr] ). It operates like sprintf3, except returning the value. It can be used anywhere a quoted string is used. If grap is run with , the environment variable GRAP_SAFER is defined, or grap has been compiled for safer operation, the sprintf command will return the format string. This mode of operation is only intended to be used only if grap is being used as part of a super-user enabled print system. Macros grap has a simple but powerful macro facility. Macros are defined using the define command : define name block undefine name Every occurrence of name in the program text is replaced by the contents of block. block is defined by a series of statements in nested { }'s, or a series of statements surrounded by the same letter. An example of the latter is define foo X coord x 1,3 X Each time foo appears in the text, it will be replaced by coord x 1,3. Macros are literal, and can contain newlines. If a macro does not span multiple lines, it should end in a semicolon to avoid parsing errors. Macros can take parameters, too. If a macro call is followed by a parenthesized, comma-separated list the values starting with $1 will be replaced in the macro with the elements of the list. A $ not followed by a digit is left unchanged. This parsing is very rudimentary; no nesting or parentheses or escaping of commas is allowed. Also, there is no way to say argument 1 followed by a digit (${1}0 in sh(1)). The following will draw a line with slope 1. define foo { next at $1, $2 } for i from 1 to 5 { foo(i,i) } Macros persist across graphs. The file /usr/local/share/grap/grap.defines contains simple macros for plotting common characters. The undefine command deletes a macro. See the directory /usr/local/share/examples/grap for more examples of macros. Confirm the location of the examples directory using the flag. Number Lists A whitespace-separated list of numbers is treated specially. The list is taken to be points to be plotted using the default line style on the default coordinate system. If more than two numbers are given, the extra numbers are taken to be additional y values to plot at the first x value. Number lists in DWB grap can be comma-separated, and this grap supports that as well. More precisely, numbers in number lists can be separated by either whitespace, commas, or both. 1 2 3 4 5 6 Will plot points using the default line style at (1,2), (1,3),(4,5) and (4,6). A simple way to plot a set of numbers in a file named ./data is: .G1 copy "./data" .G2 Pic Macros grap defines pic macros that can be used in embedded pic code to place elements in the graph. The macros are x_gg, y_gg, and xy_gg. These macros define pic distances that correspond to the given argument. They can be used to size boxes or to plot pic constructs on the graph. To place a given construct on the graph, you should add Frame.Origin to it. Other coordinate spaces can be used by replacing gg with the name of the coordinate space. A coordinate space named gg cannot be reliably accessed by these macros. The macros are emitted immediately before the frame is drawn. DWB grap may use these as part of its implementation. This grap provides them only for compatibility. Note that these are very simple macros, and may not do what you expect under complex conditions. ENVIRONMENT VARIABLES If the environment variable GRAP_DEFINES is defined, grap will look for its defines file there. If that value is a relative path name the path specified in the option will be searched for it. GRAP_DEFINES overrides the compiled in location of the defines file, but may be overridden by the or flags. If GRAP_SAFER is set, sprintf is disabled to prevent forcing grap to core dump or smash the stack. FILES /usr/local/share/grap/grap.defines SEE ALSO atan23, groff1, pic1, printf3, sh1, sprintf3, troff1 If documentation and examples have been installed, grap or grap will display the locations. BUGS There are several small incompatibilities with K&R grap They include the sh command not expanding variables and macros, and a more strict adherence to parameter order in the internal commands. Although much improved, the error reporting code can still be confused. Notably, an error in a macro is not detected until the macro is used, and it produces unusual output in the error message. Iterating many times over a macro with no newlines can run grap out of memory. AUTHOR This implementation was done by phrase Ted Faber Ao faber@lunabase.org Ac Ns role='author'. phrase Bruce Lilly Ao blilly@erols.com Ac role='author' contributed many bug fixes, including a considerable revamp of the error reporting code. If you can actually find an error in your grap code, you can probably thank him. grap was designed and specified by phrase Brian Kernighan role='author' and phrase Jon Bentley role='author'. doclifter-2.11/tests/xoxc.chk0000664000175000017500000000370612152465736014411 0ustar esresr MORE(1) more file perusal filter for crt viewing DESCRIPTION This is a partial manual page stripped down to exhibit translation of the Xo and Xc macros. h or ? Help: display a summary of these commands. If you forget all the other commands, remember this one. SPACE Display next k lines of text. Defaults to current screen size. z Display next k lines of text. Defaults to current screen size. Argument becomes new default. RETURN Display next k lines of text. Defaults to 1. Argument becomes new default. d or ^D Scroll k lines. Default is current scroll size, initially 11. Argument becomes new default. q or Q or INTERRUPT Exit. doclifter-2.11/tests/capabilities.chk0000664000175000017500000014545412152465736016070 0ustar esresr 2011-10-04 CAPABILITIES 7 2011-10-04 Linux Linux Programmer's Manual capabilities overview of Linux capabilities DESCRIPTION For the purpose of performing permission checks, traditional UNIX implementations distinguish two categories of processes: privileged processes (whose effective user ID is 0, referred to as superuser or root), and unprivileged processes (whose effective UID is nonzero). Privileged processes bypass all kernel permission checks, while unprivileged processes are subject to full permission checking based on the process's credentials (usually: effective UID, effective GID, and supplementary group list). Starting with kernel 2.2, Linux divides the privileges traditionally associated with superuser into distinct units, known as capabilities, which can be independently enabled and disabled. Capabilities are a per-thread attribute. Capabilities List The following list shows the capabilities implemented on Linux, and the operations or behaviors that each capability permits: CAP_AUDIT_CONTROL (since Linux 2.6.11) Enable and disable kernel auditing; change auditing filter rules; retrieve auditing status and filtering rules. CAP_AUDIT_WRITE (since Linux 2.6.11) Write records to kernel auditing log. CAP_CHOWN Make arbitrary changes to file UIDs and GIDs (see chown2). CAP_DAC_OVERRIDE Bypass file read, write, and execute permission checks. (DAC is an abbreviation of "discretionary access control".) CAP_DAC_READ_SEARCH Bypass file read permission checks and directory read and execute permission checks. CAP_FOWNER Bypass permission checks on operations that normally require the file system UID of the process to match the UID of the file (e.g., chmod2, utime2), excluding those operations covered by CAP_DAC_OVERRIDE and CAP_DAC_READ_SEARCH; set extended file attributes (see chattr1) on arbitrary files; set Access Control Lists (ACLs) on arbitrary files; ignore directory sticky bit on file deletion; specify O_NOATIME for arbitrary files in open2 and fcntl2. CAP_FSETID Don't clear set-user-ID and set-group-ID permission bits when a file is modified; set the set-group-ID bit for a file whose GID does not match the file system or any of the supplementary GIDs of the calling process. CAP_IPC_LOCK Lock memory (mlock2, mlockall2, mmap2, shmctl2). CAP_IPC_OWNER Bypass permission checks for operations on System V IPC objects. CAP_KILL Bypass permission checks for sending signals (see kill2). This includes use of the ioctl2 KDSIGACCEPT operation. CAP_LEASE (since Linux 2.4) Establish leases on arbitrary files (see fcntl2). CAP_LINUX_IMMUTABLE Set the FS_APPEND_FL and FS_IMMUTABLE_FL i-node flags (see chattr1). CAP_MAC_ADMIN (since Linux 2.6.25) Override Mandatory Access Control (MAC). Implemented for the Smack Linux Security Module (LSM). CAP_MAC_OVERRIDE (since Linux 2.6.25) Allow MAC configuration or state changes. Implemented for the Smack LSM. CAP_MKNOD (since Linux 2.4) Create special files using mknod2. CAP_NET_ADMIN Perform various network-related operations (e.g., setting privileged socket options, enabling multicasting, interface configuration, modifying routing tables). CAP_NET_BIND_SERVICE Bind a socket to Internet domain privileged ports (port numbers less than 1024). CAP_NET_BROADCAST (Unused) Make socket broadcasts, and listen to multicasts. CAP_NET_RAW Use RAW and PACKET sockets. CAP_SETGID Make arbitrary manipulations of process GIDs and supplementary GID list; forge GID when passing socket credentials via UNIX domain sockets. CAP_SETFCAP (since Linux 2.6.24) Set file capabilities. CAP_SETPCAP If file capabilities are not supported: grant or remove any capability in the caller's permitted capability set to or from any other process. (This property of CAP_SETPCAP is not available when the kernel is configured to support file capabilities, since CAP_SETPCAP has entirely different semantics for such kernels.) If file capabilities are supported: add any capability from the calling thread's bounding set to its inheritable set; drop capabilities from the bounding set (via prctl2 PR_CAPBSET_DROP); make changes to the securebits flags. CAP_SETUID Make arbitrary manipulations of process UIDs (setuid2, setreuid2, setresuid2, setfsuid2); make forged UID when passing socket credentials via UNIX domain sockets. CAP_SYS_ADMIN Perform a range of system administration operations including: quotactl2, mount2, umount2, swapon2, swapoff2, sethostname2, and setdomainname2; perform privileged syslog2 operations (since Linux 2.6.37, CAP_SYSLOG should be used to permit such operations); perform IPC_SET and IPC_RMID operations on arbitrary System V IPC objects; perform operations on trusted and security Extended Attributes (see attr5); use lookup_dcookie2; use ioprio_set2 to assign IOPRIO_CLASS_RT and (before Linux 2.6.25) IOPRIO_CLASS_IDLE I/O scheduling classes; forge UID when passing socket credentials; exceed /proc/sys/fs/file-max, the system-wide limit on the number of open files, in system calls that open files (e.g., accept2, execve2, open2, pipe2); employ CLONE_NEWNS flag with clone2 and unshare2; call setns2; perform KEYCTL_CHOWN and KEYCTL_SETPERM keyctl2 operations; perform madvise2 MADV_HWPOISON operation. CAP_SYS_BOOT Use reboot2 and kexec_load2. CAP_SYS_CHROOT Use chroot2. CAP_SYS_MODULE Load and unload kernel modules (see init_module2 and delete_module2); in kernels before 2.6.25: drop capabilities from the system-wide capability bounding set. CAP_SYS_NICE Raise process nice value (nice2, setpriority2) and change the nice value for arbitrary processes; set real-time scheduling policies for calling process, and set scheduling policies and priorities for arbitrary processes (sched_setscheduler2, sched_setparam2); set CPU affinity for arbitrary processes (sched_setaffinity2); set I/O scheduling class and priority for arbitrary processes (ioprio_set2); apply migrate_pages2 to arbitrary processes and allow processes to be migrated to arbitrary nodes; apply move_pages2 to arbitrary processes; use the MPOL_MF_MOVE_ALL flag with mbind2 and move_pages2. CAP_SYS_PACCT Use acct2. CAP_SYS_PTRACE Trace arbitrary processes using ptrace2; apply get_robust_list2 to arbitrary processes. CAP_SYS_RAWIO Perform I/O port operations (iopl2 and ioperm2); access /proc/kcore. CAP_SYS_RESOURCE Use reserved space on ext2 file systems; make ioctl2 calls controlling ext3 journaling; override disk quota limits; increase resource limits (see setrlimit2); override RLIMIT_NPROC resource limit; raise msg_qbytes limit for a System V message queue above the limit in /proc/sys/kernel/msgmnb (see msgop2 and msgctl2). use F_SETPIPE_SZ to increase the capacity of a pipe above the limit specified by /proc/sys/fs/pipe-max-size. CAP_SYS_TIME Set system clock (settimeofday2, stime2, adjtimex2); set real-time (hardware) clock. CAP_SYS_TTY_CONFIG Use vhangup2. CAP_SYSLOG (since Linux 2.6.37) Perform privileged syslog2 operations. See syslog2 for information on which operations require privilege. Past and Current Implementation A full implementation of capabilities requires that: 1. For all privileged operations, the kernel must check whether the thread has the required capability in its effective set. 2. The kernel must provide system calls allowing a thread's capability sets to be changed and retrieved. 3. The file system must support attaching capabilities to an executable file, so that a process gains those capabilities when the file is executed. Before kernel 2.6.24, only the first two of these requirements are met; since kernel 2.6.24, all three requirements are met. Thread Capability Sets Each thread has three capability sets containing zero or more of the above capabilities: Permitted: This is a limiting superset for the effective capabilities that the thread may assume. It is also a limiting superset for the capabilities that may be added to the inheritable set by a thread that does not have the CAP_SETPCAP capability in its effective set. If a thread drops a capability from its permitted set, it can never reacquire that capability (unless it execve2s either a set-user-ID-root program, or a program whose associated file capabilities grant that capability). Inheritable: This is a set of capabilities preserved across an execve2. It provides a mechanism for a process to assign capabilities to the permitted set of the new program during an execve2. Effective: This is the set of capabilities used by the kernel to perform permission checks for the thread. A child created via fork2 inherits copies of its parent's capability sets. See below for a discussion of the treatment of capabilities during execve2. Using capset2, a thread may manipulate its own capability sets (see below). File Capabilities Since kernel 2.6.24, the kernel supports associating capability sets with an executable file using setcap8. The file capability sets are stored in an extended attribute (see setxattr2) named security.capability. Writing to this extended attribute requires the CAP_SETFCAP capability. The file capability sets, in conjunction with the capability sets of the thread, determine the capabilities of a thread after an execve2. The three file capability sets are: Permitted (formerly known as forced): These capabilities are automatically permitted to the thread, regardless of the thread's inheritable capabilities. Inheritable (formerly known as allowed): This set is ANDed with the thread's inheritable set to determine which inheritable capabilities are enabled in the permitted set of the thread after the execve2. Effective: This is not a set, but rather just a single bit. If this bit is set, then during an execve2 all of the new permitted capabilities for the thread are also raised in the effective set. If this bit is not set, then after an execve2, none of the new permitted capabilities is in the new effective set. Enabling the file effective capability bit implies that any file permitted or inheritable capability that causes a thread to acquire the corresponding permitted capability during an execve2 (see the transformation rules described below) will also acquire that capability in its effective set. Therefore, when assigning capabilities to a file (setcap8, cap_set_file3, cap_set_fd3), if we specify the effective flag as being enabled for any capability, then the effective flag must also be specified as enabled for all other capabilities for which the corresponding permitted or inheritable flags is enabled. Transformation of Capabilities During execve() During an execve2, the kernel calculates the new capabilities of the process using the following algorithm: P'(permitted) = (P(inheritable) & F(inheritable)) | (F(permitted) & cap_bset) P'(effective) = F(effective) ? P'(permitted) : 0 P'(inheritable) = P(inheritable) [i.e., unchanged] where:
P denotes the value of a thread capability set before the execve2 P' denotes the value of a capability set after the execve2 F denotes a file capability set cap_bset is the value of the capability bounding set (described below).
Capabilities and execution of programs by root In order to provide an all-powerful root using capability sets, during an execve2: 1. If a set-user-ID-root program is being executed, or the real user ID of the process is 0 (root) then the file inheritable and permitted sets are defined to be all ones (i.e., all capabilities enabled). 2. If a set-user-ID-root program is being executed, then the file effective bit is defined to be one (enabled). The upshot of the above rules, combined with the capabilities transformations described above, is that when a process execve2s a set-user-ID-root program, or when a process with an effective UID of 0 execve2s a program, it gains all capabilities in its permitted and effective capability sets, except those masked out by the capability bounding set. This provides semantics that are the same as those provided by traditional UNIX systems. Capability bounding set The capability bounding set is a security mechanism that can be used to limit the capabilities that can be gained during an execve2. The bounding set is used in the following ways: During an execve2, the capability bounding set is ANDed with the file permitted capability set, and the result of this operation is assigned to the thread's permitted capability set. The capability bounding set thus places a limit on the permitted capabilities that may be granted by an executable file. (Since Linux 2.6.25) The capability bounding set acts as a limiting superset for the capabilities that a thread can add to its inheritable set using capset2. This means that if a capability is not in the bounding set, then a thread can't add this capability to its inheritable set, even if it was in its permitted capabilities, and thereby cannot have this capability preserved in its permitted set when it execve2s a file that has the capability in its inheritable set. Note that the bounding set masks the file permitted capabilities, but not the inherited capabilities. If a thread maintains a capability in its inherited set that is not in its bounding set, then it can still gain that capability in its permitted set by executing a file that has the capability in its inherited set. Depending on the kernel version, the capability bounding set is either a system-wide attribute, or a per-process attribute. Capability bounding set prior to Linux 2.6.25 In kernels before 2.6.25, the capability bounding set is a system-wide attribute that affects all threads on the system. The bounding set is accessible via the file /proc/sys/kernel/cap-bound. (Confusingly, this bit mask parameter is expressed as a signed decimal number in /proc/sys/kernel/cap-bound.) Only the init process may set capabilities in the capability bounding set; other than that, the superuser (more precisely: programs with the CAP_SYS_MODULE capability) may only clear capabilities from this set. On a standard system the capability bounding set always masks out the CAP_SETPCAP capability. To remove this restriction (dangerous!), modify the definition of CAP_INIT_EFF_SET in include/linux/capability.h and rebuild the kernel. The system-wide capability bounding set feature was added to Linux starting with kernel version 2.2.11. Capability bounding set from Linux 2.6.25 onward From Linux 2.6.25, the capability bounding set is a per-thread attribute. (There is no longer a system-wide capability bounding set.) The bounding set is inherited at fork2 from the thread's parent, and is preserved across an execve2. A thread may remove capabilities from its capability bounding set using the prctl2 PR_CAPBSET_DROP operation, provided it has the CAP_SETPCAP capability. Once a capability has been dropped from the bounding set, it cannot be restored to that set. A thread can determine if a capability is in its bounding set using the prctl2 PR_CAPBSET_READ operation. Removing capabilities from the bounding set is only supported if file capabilities are compiled into the kernel. In kernels before Linux 2.6.33, file capabilities were an optional feature configurable via the CONFIG_SECURITY_FILE_CAPABILITIES option. Since Linux 2.6.33, the configuration option has been removed and file capabilities are always part of the kernel. When file capabilities are compiled into the kernel, the init process (the ancestor of all processes) begins with a full bounding set. If file capabilities are not compiled into the kernel, then init begins with a full bounding set minus CAP_SETPCAP, because this capability has a different meaning when there are no file capabilities. Removing a capability from the bounding set does not remove it from the thread's inherited set. However it does prevent the capability from being added back into the thread's inherited set in the future. Effect of User ID Changes on Capabilities To preserve the traditional semantics for transitions between 0 and nonzero user IDs, the kernel makes the following changes to a thread's capability sets on changes to the thread's real, effective, saved set, and file system user IDs (using setuid2, setresuid2, or similar): 1. If one or more of the real, effective or saved set user IDs was previously 0, and as a result of the UID changes all of these IDs have a nonzero value, then all capabilities are cleared from the permitted and effective capability sets. 2. If the effective user ID is changed from 0 to nonzero, then all capabilities are cleared from the effective set. 3. If the effective user ID is changed from nonzero to 0, then the permitted set is copied to the effective set. 4. If the file system user ID is changed from 0 to nonzero (see setfsuid2) then the following capabilities are cleared from the effective set: CAP_CHOWN, CAP_DAC_OVERRIDE, CAP_DAC_READ_SEARCH, CAP_FOWNER, CAP_FSETID, CAP_LINUX_IMMUTABLE (since Linux 2.2.30), CAP_MAC_OVERRIDE, and CAP_MKNOD (since Linux 2.2.30). If the file system UID is changed from nonzero to 0, then any of these capabilities that are enabled in the permitted set are enabled in the effective set. If a thread that has a 0 value for one or more of its user IDs wants to prevent its permitted capability set being cleared when it resets all of its user IDs to nonzero values, it can do so using the prctl2 PR_SET_KEEPCAPS operation. Programmatically adjusting capability sets A thread can retrieve and change its capability sets using the capget2 and capset2 system calls. However, the use of cap_get_proc3 and cap_set_proc3, both provided in the libcap package, is preferred for this purpose. The following rules govern changes to the thread capability sets: 1. If the caller does not have the CAP_SETPCAP capability, the new inheritable set must be a subset of the combination of the existing inheritable and permitted sets. 2. (Since kernel 2.6.25) The new inheritable set must be a subset of the combination of the existing inheritable set and the capability bounding set. 3. The new permitted set must be a subset of the existing permitted set (i.e., it is not possible to acquire permitted capabilities that the thread does not currently have). 4. The new effective set must be a subset of the new permitted set. The "securebits" flags: establishing a capabilities-only environment Starting with kernel 2.6.26, and with a kernel in which file capabilities are enabled, Linux implements a set of per-thread securebits flags that can be used to disable special handling of capabilities for UID 0 (root). These flags are as follows: SECBIT_KEEP_CAPS Setting this flag allows a thread that has one or more 0 UIDs to retain its capabilities when it switches all of its UIDs to a nonzero value. If this flag is not set, then such a UID switch causes the thread to lose all capabilities. This flag is always cleared on an execve2. (This flag provides the same functionality as the older prctl2 PR_SET_KEEPCAPS operation.) SECBIT_NO_SETUID_FIXUP Setting this flag stops the kernel from adjusting capability sets when the threads's effective and file system UIDs are switched between zero and nonzero values. (See the subsection Effect of User ID Changes on Capabilities.) SECBIT_NOROOT If this bit is set, then the kernel does not grant capabilities when a set-user-ID-root program is executed, or when a process with an effective or real UID of 0 calls execve2. (See the subsection Capabilities and execution of programs by root.) Each of the above "base" flags has a companion "locked" flag. Setting any of the "locked" flags is irreversible, and has the effect of preventing further changes to the corresponding "base" flag. The locked flags are: SECBIT_KEEP_CAPS_LOCKED, SECBIT_NO_SETUID_FIXUP_LOCKED, and SECBIT_NOROOT_LOCKED. The securebits flags can be modified and retrieved using the prctl2 PR_SET_SECUREBITS and PR_GET_SECUREBITS operations. The CAP_SETPCAP capability is required to modify the flags. The securebits flags are inherited by child processes. During an execve2, all of the flags are preserved, except SECBIT_KEEP_CAPS which is always cleared. An application can use the following call to lock itself, and all of its descendants, into an environment where the only way of gaining capabilities is by executing a program with associated file capabilities: prctl(PR_SET_SECUREBITS, SECBIT_KEEP_CAPS_LOCKED | SECBIT_NO_SETUID_FIXUP | SECBIT_NO_SETUID_FIXUP_LOCKED | SECBIT_NOROOT | SECBIT_NOROOT_LOCKED);
CONFORMING TO No standards govern capabilities, but the Linux capability implementation is based on the withdrawn POSIX.1e draft standard; see http://wt.xpilot.org/publications/posix.1e/. NOTES Since kernel 2.5.27, capabilities are an optional kernel component, and can be enabled/disabled via the CONFIG_SECURITY_CAPABILITIES kernel configuration option. The /proc/PID/task/TID/status file can be used to view the capability sets of a thread. The /proc/PID/status file shows the capability sets of a process's main thread. The libcap package provides a suite of routines for setting and getting capabilities that is more comfortable and less likely to change than the interface provided by capset2 and capget2. This package also provides the setcap8 and getcap8 programs. It can be found at http://www.kernel.org/pub/linux/libs/security/linux-privs. Before kernel 2.6.24, and since kernel 2.6.24 if file capabilities are not enabled, a thread with the CAP_SETPCAP capability can manipulate the capabilities of threads other than itself. However, this is only theoretically possible, since no thread ever has CAP_SETPCAP in either of these cases: In the pre-2.6.25 implementation the system-wide capability bounding set, /proc/sys/kernel/cap-bound, always masks out this capability, and this can not be changed without modifying the kernel source and rebuilding. If file capabilities are disabled in the current implementation, then init starts out with this capability removed from its per-process bounding set, and that bounding set is inherited by all other processes created on the system. SEE ALSO capget2, prctl2, setfsuid2, cap_clear3, cap_copy_ext3, cap_from_text3, cap_get_file3, cap_get_proc3, cap_init3, capgetp3, capsetp3, credentials7, pthreads7, getcap8, setcap8 include/linux/capability.h in the kernel source COLOPHON This page is part of release 3.35 of the Linux man-pages project. A description of the project, and information about reporting bugs, can be found at http://man7.org/linux/man-pages/.
doclifter-2.11/tests/stringwidth.chk0000664000175000017500000000222412152465736015770 0ustar esresr 2011-09-28 PRINTF 3 2011-09-28 GNU Linux Programmer's Manual printf fprintf sprintf snprintf vprintf vfprintf vsprintf vsnprintf formatted output conversion DESCRIPTION This is a stripped manpage intended to test a common evaluation case of the \w conditiol in groff. EXAMPLE To print pi to five decimal places: doclifter-2.11/tests/pax.man0000664000175000017500000034701712152465736014234 0ustar esresr.\" Copyright (c) 2001-2003 The Open Group, All Rights Reserved .TH "PAX" P 2003 "IEEE/The Open Group" "POSIX Programmer's Manual" .\" pax .SH NAME pax \- portable archive interchange .SH SYNOPSIS .nf \fBpax\fP \ \fB[\fP\fB-cdnv\fP\fB]\fP \ \fB[\fP\fB-H|-L\fP\fB]\fP \ \fB[\fP\fB-f\fP \fIarchive\fP\fB]\fP \ \fB[\fP\fB-s\fP \fIreplstr\fP\fP\fB]\fP\fB...\fP \ \fB[\fP\fIpattern\fP\fB...\fP\fB]\fP .br .sp \fBpax -r\fP \ \fB[\fP\fB-cdiknuv\fP\fB]\fP \ \fB[\fP\fB-H|-L\fP\fB]\fP \ \fB[\fP\fB-f\fP \fIarchive\fP\fB]\fP \ \fB[\fP\fB-o\fP \fIoptions\fP\fB]\fP\fB...\fP \ \fB[\fP\fB-p\fP \fIstring\fP\fB]\fP\fB...\fP .br \ \ \ \ \ \ \fB[\fP\fB-s\fP \fIreplstr\fP\fB]\fP\fB...\fP \ \fB[\fP\fIpattern\fP\fB...\fP\fB]\fP\fB .br .sp pax -w\fP \ \fB[\fP\fB-dituvX\fP\fB]\fP \ \fB[\fP\fB-H|-L\fP\fB]\fP \ \fB[\fP\fB-b\fP \fIblksize\fP\fB]\fP \ \fB[\fP\fB-a\fP\fB]\fP \ \fB[\fP\fB-f\fP \fIarchive\fP\fB]\fP \ \fB[\fP\fB-o\fP \fIoptions\fP\fB]\fP\fB...\fP .br \ \ \ \ \ \ \fB[\fP\fB-s\fP \fIreplstr\fP\fB]\fP\fB...\fP \ \fB[\fP\fB-x\fP \fIformat\fP\fB]\fP \ \fB[\fP\fIfile\fP\fB...\fP\fB]\fP\fB .br .sp pax -r -w\fP \ \fB[\fP\fB-diklntuvX\fP\fB]\fP \ \fB[\fP\fB-H|-L\fP\fB]\fP \ \fB[\fP\fB-p\fP \fIstring\fP\fB]\fP\fB...\fP \ \fB[\fP\fB-s\fP \fIreplstr\fP\fB]\fP\fB... .br \ \ \ \ \ \ \fB[\fP\fIfile\fP\fB...\fP\fB]\fP \ \fIdirectory\fP .br .fi .SH DESCRIPTION .LP The \fIpax\fP utility shall read, write, and write lists of the members of archive files and copy directory hierarchies. A variety of archive formats shall be supported; see the \fB-x\fP \fIformat\fP option. .LP The action to be taken depends on the presence of the \fB-r\fP and \fB-w\fP options. The four combinations of \fB-r\fP and \fB-w\fP are referred to as the four modes of operation: \fBlist\fP, \fBread\fP, \fBwrite\fP, and \fBcopy\fP modes, corresponding respectively to the four forms shown in the SYNOPSIS section. .TP 7 \fBlist\fP In \fBlist\fP mode (when neither \fB-r\fP nor \fB-w\fP are specified), \fIpax\fP shall write the names of the members of the archive file read from the standard input, with pathnames matching the specified patterns, to standard output. If a named file is of type directory, the file hierarchy rooted at that file shall be listed as well. .TP 7 \fBread\fP In \fBread\fP mode (when \fB-r\fP is specified, but \fB-w\fP is not), \fIpax\fP shall extract the members of the archive file read from the standard input, with pathnames matching the specified patterns. If an extracted file is of type directory, the file hierarchy rooted at that file shall be extracted as well. The extracted files shall be created performing pathname resolution with the directory in which \fIpax\fP was invoked as the current working directory. .LP If an attempt is made to extract a directory when the directory already exists, this shall not be considered an error. If an attempt is made to extract a FIFO when the FIFO already exists, this shall not be considered an error. .LP The ownership, access, and modification times, and file mode of the restored files are discussed under the \fB-p\fP option. .TP 7 \fBwrite\fP In \fBwrite\fP mode (when \fB-w\fP is specified, but \fB-r\fP is not), \fIpax\fP shall write the contents of the \fIfile\fP operands to the standard output in an archive format. If no \fIfile\fP operands are specified, a list of files to copy, one per line, shall be read from the standard input. A file of type directory shall include all of the files in the file hierarchy rooted at the file. .TP 7 \fBcopy\fP In \fBcopy\fP mode (when both \fB-r\fP and \fB-w\fP are specified), \fIpax\fP shall copy the \fIfile\fP operands to the destination directory. .LP If no \fIfile\fP operands are specified, a list of files to copy, one per line, shall be read from the standard input. A file of type directory shall include all of the files in the file hierarchy rooted at the file. .LP The effect of the \fBcopy\fP shall be as if the copied files were written to an archive file and then subsequently extracted, except that there may be hard links between the original and the copied files. If the destination directory is a subdirectory of one of the files to be copied, the results are unspecified. If the destination directory is a file of a type not defined by the System Interfaces volume of IEEE\ Std\ 1003.1-2001, the results are implementation-defined; otherwise, it shall be an error for the file named by the \fIdirectory\fP operand not to exist, not be writable by the user, or not be a file of type directory. .sp .LP In \fBread\fP or \fBcopy\fP modes, if intermediate directories are necessary to extract an archive member, \fIpax\fP shall perform actions equivalent to the \fImkdir\fP() function defined in the System Interfaces volume of IEEE\ Std\ 1003.1-2001, called with the following arguments: .IP " *" 3 The intermediate directory used as the \fIpath\fP argument .LP .IP " *" 3 The value of the bitwise-inclusive OR of S_IRWXU, S_IRWXG, and S_IRWXO as the \fImode\fP argument .LP .LP If any specified \fIpattern\fP or \fIfile\fP operands are not matched by at least one file or archive member, \fIpax\fP shall write a diagnostic message to standard error for each one that did not match and exit with a non-zero exit status. .LP The archive formats described in the EXTENDED DESCRIPTION section shall be automatically detected on input. The default output archive format shall be implementation-defined. .LP A single archive can span multiple files. The \fIpax\fP utility shall determine, in an implementation-defined manner, what file to read or write as the next file. .LP If the selected archive format supports the specification of linked files, it shall be an error if these files cannot be linked when the archive is extracted. For archive formats that do not store file contents with each name that causes a hard link, if the file that contains the data is not extracted during this \fIpax\fP session, either the data shall be restored from the original file, or a diagnostic message shall be displayed with the name of a file that can be used to extract the data. In traversing directories, \fIpax\fP shall detect infinite loops; that is, entering a previously visited directory that is an ancestor of the last file visited. When it detects an infinite loop, \fIpax\fP shall write a diagnostic message to standard error and shall terminate. .SH OPTIONS .LP The \fIpax\fP utility shall conform to the Base Definitions volume of IEEE\ Std\ 1003.1-2001, Section 12.2, Utility Syntax Guidelines, except that the order of presentation of the \fB-o\fP, \fB-p\fP, and \fB-s\fP options is significant. .LP The following options shall be supported: .TP 7 \fB-r\fP Read an archive file from standard input. .TP 7 \fB-w\fP Write files to the standard output in the specified archive format. .TP 7 \fB-a\fP Append files to the end of the archive. It is implementation-defined which devices on the system support appending. Additional file formats unspecified by this volume of IEEE\ Std\ 1003.1-2001 may impose restrictions on appending. .TP 7 \fB-b\ \fP \fIblocksize\fP Block the output at a positive decimal integer number of bytes per write to the archive file. Devices and archive formats may impose restrictions on blocking. Blocking shall be automatically determined on input. Conforming applications shall not specify a \fIblocksize\fP value larger than 32256. Default blocking when creating archives depends on the archive format. (See the \fB-x\fP option below.) .TP 7 \fB-c\fP Match all file or archive members except those specified by the \fIpattern\fP or \fIfile\fP operands. .TP 7 \fB-d\fP Cause files of type directory being copied or archived or archive members of type directory being extracted or listed to match only the file or archive member itself and not the file hierarchy rooted at the file. .TP 7 \fB-f\ \fP \fIarchive\fP Specify the pathname of the input or output archive, overriding the default standard input (in \fBlist\fP or \fBread\fP modes) or standard output ( \fBwrite\fP mode). .TP 7 \fB-H\fP If a symbolic link referencing a file of type directory is specified on the command line, \fIpax\fP shall archive the file hierarchy rooted in the file referenced by the link, using the name of the link as the root of the file hierarchy. Otherwise, if a symbolic link referencing a file of any other file type which \fIpax\fP can normally archive is specified on the command line, then \fIpax\fP shall archive the file referenced by the link, using the name of the link. The default behavior shall be to archive the symbolic link itself. .TP 7 \fB-i\fP Interactively rename files or archive members. For each archive member matching a \fIpattern\fP operand or file matching a \fIfile\fP operand, a prompt shall be written to the file \fB/dev/tty\fP. The prompt shall contain the name of the file or archive member, but the format is otherwise unspecified. A line shall then be read from \fB/dev/tty\fP. If this line is blank, the file or archive member shall be skipped. If this line consists of a single period, the file or archive member shall be processed with no modification to its name. Otherwise, its name shall be replaced with the contents of the line. The \fIpax\fP utility shall immediately exit with a non-zero exit status if end-of-file is encountered when reading a response or if \fB/dev/tty\fP cannot be opened for reading and writing. .LP The results of extracting a hard link to a file that has been renamed during extraction are unspecified. .TP 7 \fB-k\fP Prevent the overwriting of existing files. .TP 7 \fB-l\fP (The letter ell.) In \fBcopy\fP mode, hard links shall be made between the source and destination file hierarchies whenever possible. If specified in conjunction with \fB-H\fP or \fB-L\fP, when a symbolic link is encountered, the hard link created in the destination file hierarchy shall be to the file referenced by the symbolic link. If specified when neither \fB-H\fP nor \fB-L\fP is specified, when a symbolic link is encountered, the implementation shall create a hard link to the symbolic link in the source file hierarchy or copy the symbolic link to the destination. .TP 7 \fB-L\fP If a symbolic link referencing a file of type directory is specified on the command line or encountered during the traversal of a file hierarchy, \fIpax\fP shall archive the file hierarchy rooted in the file referenced by the link, using the name of the link as the root of the file hierarchy. Otherwise, if a symbolic link referencing a file of any other file type which \fIpax\fP can normally archive is specified on the command line or encountered during the traversal of a file hierarchy, \fIpax\fP shall archive the file referenced by the link, using the name of the link. The default behavior shall be to archive the symbolic link itself. .TP 7 \fB-n\fP Select the first archive member that matches each \fIpattern\fP operand. No more than one archive member shall be matched for each pattern (although members of type directory shall still match the file hierarchy rooted at that file). .TP 7 \fB-o\ \fP \fIoptions\fP Provide information to the implementation to modify the algorithm for extracting or writing files. The value of \fIoptions\fP shall consist of one or more comma-separated keywords of the form: .sp .RS .nf \fIkeyword\fP\fB[[\fP\fB:\fP\fB]\fP\fB=\fP\fIvalue\fP\fB][\fP\fB,\fP\fIkeyword\fP\fB[[\fP\fB:\fP\fB]\fP\fB=\fP\fIvalue\fP\fB]\fP\fB, ...\fP\fB]\fP .fi .RE .LP Some keywords apply only to certain file formats, as indicated with each description. Use of keywords that are inapplicable to the file format being processed produces undefined results. .LP Keywords in the \fIoptions\fP argument shall be a string that would be a valid portable filename as described in the Base Definitions volume of IEEE\ Std\ 1003.1-2001, Section 3.276, Portable Filename Character Set. .TP 7 \fBNote:\fP .RS Keywords are not expected to be filenames, merely to follow the same character composition rules as portable filenames. .RE .sp .LP Keywords can be preceded with white space. The \fIvalue\fP field shall consist of zero or more characters; within \fIvalue\fP, the application shall precede any literal comma with a backslash, which shall be ignored, but preserves the comma as part of \fIvalue\fP. A comma as the final character, or a comma followed solely by white space as the final characters, in \fIoptions\fP shall be ignored. Multiple \fB-o\fP options can be specified; if keywords given to these multiple \fB-o\fP options conflict, the keywords and values appearing later in command line sequence shall take precedence and the earlier shall be silently ignored. The following keyword values of \fIoptions\fP shall be supported for the file formats as indicated: .TP 7 \fBdelete\fP=\fIpattern\fP .RS .sp (Applicable only to the \fB-x\fP \fBpax\fP format.) When used in \fBwrite\fP or \fBcopy\fP mode, \fIpax\fP shall omit from extended header records that it produces any keywords matching the string pattern. When used in \fBread\fP or \fBlist\fP mode, \fIpax\fP shall ignore any keywords matching the string pattern in the extended header records. In both cases, matching shall be performed using the pattern matching notation described in \fIPatterns Matching a Single Character\fP and \fIPatterns Matching Multiple Characters\fP . For example: .sp .RS .nf \fB-o\fP \fBdelete\fP\fB=\fP\fIsecurity\fP\fB.* \fP .fi .RE .LP would suppress security-related information. See pax Extended Header for extended header record keyword usage. .RE .TP 7 \fBexthdr.name\fP=\fIstring\fP .RS .sp (Applicable only to the \fB-x\fP \fBpax\fP format.) This keyword allows user control over the name that is written into the \fBustar\fP header blocks for the extended header produced under the circumstances described in pax Header Block . The name shall be the contents of \fIstring\fP, after the following character substitutions have been made: .TS C center; l lw(40). \fB\fIstring\fP\fP T{ .na \fB\ \fP .ad T} \fBIncludes:\fP T{ .na \fBReplaced By:\fP .ad T} %d T{ .na The directory name of the file, equivalent to the result of the \fIdirname\fP utility on the translated pathname. .ad T} %f T{ .na The filename of the file, equivalent to the result of the \fIbasename\fP utility on the translated pathname. .ad T} %p T{ .na The process ID of the \fIpax\fP process. .ad T} %% T{ .na A \fB'%'\fP character. .ad T} .TE .LP Any other \fB'%'\fP characters in \fIstring\fP produce undefined results. .LP If no \fB-o\fP \fBexthdr.name=\fP \fIstring\fP is specified, \fIpax\fP shall use the following default value: .sp .RS .nf \fB%d/PaxHeaders.%p/%f \fP .fi .RE .RE .TP 7 \fBglobexthdr.name\fP=\fIstring\fP .RS .sp (Applicable only to the \fB-x\fP \fBpax\fP format.) When used in \fBwrite\fP or \fBcopy\fP mode with the appropriate options, \fIpax\fP shall create global extended header records with \fBustar\fP header blocks that will be treated as regular files by previous versions of \fIpax\fP. This keyword allows user control over the name that is written into the \fBustar\fP header blocks for global extended header records. The name shall be the contents of string, after the following character substitutions have been made: .TS C center; l lw(40). \fB\fIstring\fP\fP T{ .na \fB\ \fP .ad T} \fBIncludes:\fP T{ .na \fBReplaced By:\fP .ad T} %n T{ .na An integer that represents the sequence number of the global extended header record in the archive, starting at 1. .ad T} %p T{ .na The process ID of the \fIpax\fP process. .ad T} %% T{ .na A \fB'%'\fP character. .ad T} .TE .LP Any other \fB'%'\fP characters in \fIstring\fP produce undefined results. .LP If no \fB-o\fP \fBglobexthdr.name=\fP \fIstring\fP is specified, \fIpax\fP shall use the following default value: .sp .RS .nf \fB$TMPDIR/GlobalHead.%p.%n \fP .fi .RE .LP where $ \fITMPDIR\fP represents the value of the \fITMPDIR\fP environment variable. If \fITMPDIR\fP is not set, \fIpax\fP shall use \fB/tmp\fP. .RE .TP 7 \fBinvalid\fP=\fIaction\fP .RS .sp (Applicable only to the \fB-x\fP \fBpax\fP format.) This keyword allows user control over the action \fIpax\fP takes upon encountering values in an extended header record that, in \fBread\fP or \fBcopy\fP mode, are invalid in the destination hierarchy or, in \fBlist\fP mode, cannot be written in the codeset and current locale of the implementation. The following are invalid values that shall be recognized by \fIpax\fP: .RS .IP " *" 3 In \fBread\fP or \fBcopy\fP mode, a filename or link name that contains character encodings invalid in the destination hierarchy. (For example, the name may contain embedded NULs.) .LP .IP " *" 3 In \fBread\fP or \fBcopy\fP mode, a filename or link name that is longer than the maximum allowed in the destination hierarchy (for either a pathname component or the entire pathname). .LP .IP " *" 3 In \fBlist\fP mode, any character string value (filename, link name, user name, and so on) that cannot be written in the codeset and current locale of the implementation. .LP .RE .LP The following mutually-exclusive values of the \fIaction\fP argument are supported: .TP 7 \fBbypass\fP .RS In \fBread\fP or \fBcopy\fP mode, \fIpax\fP shall bypass the file, causing no change to the destination hierarchy. In \fBlist\fP mode, \fIpax\fP shall write all requested valid values for the file, but its method for writing invalid values is unspecified. .RE .TP 7 \fBrename\fP .RS In \fBread\fP or \fBcopy\fP mode, \fIpax\fP shall act as if the \fB-i\fP option were in effect for each file with invalid filename or link name values, allowing the user to provide a replacement name interactively. In \fBlist\fP mode, \fIpax\fP shall behave identically to the \fBbypass\fP action. .RE .TP 7 \fBUTF-8\fP .RS When used in \fBread\fP, \fBcopy\fP, or \fBlist\fP mode and a filename, link name, owner name, or any other field in an extended header record cannot be translated from the \fBpax\fP UTF-8 codeset format to the codeset and current locale of the implementation, \fIpax\fP shall use the actual UTF-8 encoding for the name. .RE .TP 7 \fBwrite\fP .RS In \fBread\fP or \fBcopy\fP mode, \fIpax\fP shall write the file, translating or truncating the name, regardless of whether this may overwrite an existing file with a valid name. In \fBlist\fP mode, \fIpax\fP shall behave identically to the \fBbypass\fP action. .RE .sp .LP If no \fB-o\fP \fBinvalid=\fP option is specified, \fIpax\fP shall act as if \fB-o\fP \fBinvalid=\fP \fBbypass\fP were specified. Any overwriting of existing files that may be allowed by the \fB-o\fP \fBinvalid=\fP actions shall be subject to permission ( \fB-p\fP) and modification time ( \fB-u\fP) restrictions, and shall be suppressed if the \fB-k\fP option is also specified. .RE .TP 7 \fBlinkdata\fP .RS .sp (Applicable only to the \fB-x\fP \fBpax\fP format.) In \fBwrite\fP mode, \fIpax\fP shall write the contents of a file to the archive even when that file is merely a hard link to a file whose contents have already been written to the archive. .RE .TP 7 \fBlistopt\fP=\fIformat\fP .RS .sp This keyword specifies the output format of the table of contents produced when the \fB-v\fP option is specified in \fBlist\fP mode. See List Mode Format Specifications . To avoid ambiguity, the \fBlistopt=\fP \fIformat\fP shall be the only or final \fBkeyword=\fP \fIvalue\fP pair in a \fB-o\fP option-argument; all characters in the remainder of the option-argument shall be considered part of the format string. When multiple \fB-o\fP \fBlistopt=\fP \fIformat\fP options are specified, the format strings shall be considered a single, concatenated string, evaluated in command line order. .RE .TP 7 \fBtimes\fP .RS .sp (Applicable only to the \fB-x\fP \fIpax\fP format.) When used in \fBwrite\fP or \fBcopy\fP mode, \fIpax\fP shall include \fBatime\fP, \fBctime\fP, and \fBmtime\fP extended header records for each file. See pax Extended Header File Times . .RE .sp .LP In addition to these keywords, if the \fB-x\fP \fIpax\fP format is specified, any of the keywords and values defined in pax Extended Header , including implementation extensions, can be used in \fB-o\fP option-arguments, in either of two modes: .TP 7 \fBkeyword\fP=\fIvalue\fP .RS .sp When used in \fBwrite\fP or \fBcopy\fP mode, these keyword/value pairs shall be included at the beginning of the archive as \fBtypeflag\fP \fBg\fP global extended header records. When used in \fBread\fP or \fBlist\fP mode, these keyword/value pairs shall act as if they had been at the beginning of the archive as \fBtypeflag\fP \fBg\fP global extended header records. .RE .TP 7 \fBkeyword\fP:=\fIvalue\fP .RS .sp When used in \fBwrite\fP or \fBcopy\fP mode, these keyword/value pairs shall be included as records at the beginning of a \fBtypeflag\fP \fBx\fP extended header for each file. (This shall be equivalent to the equal-sign form except that it creates no \fBtypeflag\fP \fBg\fP global extended header records.) When used in \fBread\fP or \fBlist\fP mode, these keyword/value pairs shall act as if they were included as records at the end of each extended header; thus, they shall override any global or file-specific extended header record keywords of the same names. For example, in the command: .sp .RS .nf \fBpax -r -o " gname:=mygroup, " s. .LP Any non-null character can be used as a delimiter ( \fB'/'\fP shown here). Multiple \fB-s\fP expressions can be specified; the expressions shall be applied in the order specified, terminating with the first successful substitution. The optional trailing \fB'g'\fP is as defined in the \fIed\fP utility. The optional trailing \fB'p'\fP shall cause successful substitutions to be written to standard error. File or archive member names that substitute to the empty string shall be ignored when reading and writing archives. .TP 7 \fB-t\fP When reading files from the file system, and if the user has the permissions required by \fIutime\fP() to do so, set the access time of each file read to the access time that it had before being read by \fIpax\fP. .TP 7 \fB-u\fP Ignore files that are older (having a less recent file modification time) than a pre-existing file or archive member with the same name. In \fBread\fP mode, an archive member with the same name as a file in the file system shall be extracted if the archive member is newer than the file. In \fBwrite\fP mode, an archive file member with the same name as a file in the file system shall be superseded if the file is newer than the archive member. If \fB-a\fP is also specified, this is accomplished by appending to the archive; otherwise, it is unspecified whether this is accomplished by actual replacement in the archive or by appending to the archive. In \fBcopy\fP mode, the file in the destination hierarchy shall be replaced by the file in the source hierarchy or by a link to the file in the source hierarchy if the file in the source hierarchy is newer. .TP 7 \fB-v\fP In \fBlist\fP mode, produce a verbose table of contents (see the STDOUT section). Otherwise, write archive member pathnames to standard error (see the STDERR section). .TP 7 \fB-x\ \fP \fIformat\fP Specify the output archive format. The \fIpax\fP utility shall support the following formats: .TP 7 \fBcpio\fP .RS The \fBcpio\fP interchange format; see the EXTENDED DESCRIPTION section. The default \fIblocksize\fP for this format for character special archive files shall be 5120. Implementations shall support all \fIblocksize\fP values less than or equal to 32256 that are multiples of 512. .RE .TP 7 \fBpax\fP .RS The \fBpax\fP interchange format; see the EXTENDED DESCRIPTION section. The default \fIblocksize\fP for this format for character special archive files shall be 5120. Implementations shall support all \fIblocksize\fP values less than or equal to 32256 that are multiples of 512. .RE .TP 7 \fBustar\fP .RS The \fBtar\fP interchange format; see the EXTENDED DESCRIPTION section. The default \fIblocksize\fP for this format for character special archive files shall be 10240. Implementations shall support all \fIblocksize\fP values less than or equal to 32256 that are multiples of 512. .RE .sp .LP Implementation-defined formats shall specify a default block size as well as any other block sizes supported for character special archive files. .LP Any attempt to append to an archive file in a format different from the existing archive format shall cause \fIpax\fP to exit immediately with a non-zero exit status. .LP In \fBcopy\fP mode, if no \fB-x\fP format is specified, \fIpax\fP shall behave as if \fB-x\fP \fIpax\fP were specified. .TP 7 \fB-X\fP When traversing the file hierarchy specified by a pathname, \fIpax\fP shall not descend into directories that have a different device ID ( \fIst_dev\fP; see the System Interfaces volume of IEEE\ Std\ 1003.1-2001, \fIstat\fP()). .sp .LP The options that operate on the names of files or archive members ( \fB-c\fP, \fB-i\fP, \fB-n\fP, \fB-s\fP, \fB-u\fP, and \fB-v\fP) shall interact as follows. In \fBread\fP mode, the archive members shall be selected based on the user-specified \fIpattern\fP operands as modified by the \fB-c\fP, \fB-n\fP, and \fB-u\fP options. Then, any \fB-s\fP and \fB-i\fP options shall modify, in that order, the names of the selected files. The \fB-v\fP option shall write names resulting from these modifications. .LP In \fBwrite\fP mode, the files shall be selected based on the user-specified pathnames as modified by the \fB-n\fP and \fB-u\fP options. Then, any \fB-s\fP and \fB-i\fP options shall modify, in that order, the names of these selected files. The \fB-v\fP option shall write names resulting from these modifications. .LP If both the \fB-u\fP and \fB-n\fP options are specified, \fIpax\fP shall not consider a file selected unless it is newer than the file to which it is compared. .SS List Mode Format Specifications .LP In \fBlist\fP mode with the \fB-o\fP \fBlistopt=\fP \fIformat\fP option, the \fIformat\fP argument shall be applied for each selected file. The \fIpax\fP utility shall append a to the \fBlistopt\fP output for each selected file. The \fIformat\fP argument shall be used as the \fIformat\fP string described in the Base Definitions volume of IEEE\ Std\ 1003.1-2001, Chapter 5, File Format Notation, with the exceptions 1. through 5. defined in the EXTENDED DESCRIPTION section of \fIprintf\fP, plus the following exceptions: .TP 7 6. The sequence ( \fIkeyword\fP) can occur before a format conversion specifier. The conversion argument is defined by the value of \fIkeyword\fP. The implementation shall support the following keywords: .RS .IP " *" 3 Any of the Field Name entries in ustar Header Block and Octet-Oriented cpio Archive Entry . The implementation may support the \fIcpio\fP keywords without the leading \fBc_\fP in addition to the form required by Values for cpio c_mode Field . .LP .IP " *" 3 Any keyword defined for the extended header in pax Extended Header \&. .LP .IP " *" 3 Any keyword provided as an implementation-defined extension within the extended header defined in pax Extended Header . .LP .RE .LP For example, the sequence \fB"%(charset)s"\fP is the string value of the name of the character set in the extended header. .LP The result of the keyword conversion argument shall be the value from the applicable header field or extended header, without any trailing NULs. .LP All keyword values used as conversion arguments shall be translated from the UTF-8 encoding to the character set appropriate for the local file system, user database, and so on, as applicable. .TP 7 7. An additional conversion specifier character, \fBT\fP , shall be used to specify time formats. The \fBT\fP conversion specifier character can be preceded by the sequence ( \fIkeyword=\fP \fIsubformat\fP), where \fIsubformat\fP is a date format as defined by \fIdate\fP operands. The default \fIkeyword\fP shall be \fBmtime\fP and the default subformat shall be: .sp .RS .nf \fB%b %e %H:%M %Y \fP .fi .RE .TP 7 8. An additional conversion specifier character, \fBM\fP , shall be used to specify the file mode string as defined in \fIls\fP Standard Output. If ( \fIkeyword\fP) is omitted, the \fBmode\fP keyword shall be used. For example, \fB%.1M\fP writes the single character corresponding to the <\fIentry\ type\fP> field of the \fIls\fP \fB-l\fP command. .TP 7 9. An additional conversion specifier character, \fBD\fP , shall be used to specify the device for block or special files, if applicable, in an implementation-defined format. If not applicable, and ( \fIkeyword\fP) is specified, then this conversion shall be equivalent to \fB%(\fP\fIkeyword\fP\fB)u\fP. If not applicable, and ( \fIkeyword\fP) is omitted, then this conversion shall be equivalent to . .TP 7 10. An additional conversion specifier character, \fBF\fP , shall be used to specify a pathname. The \fBF\fP conversion character can be preceded by a sequence of comma-separated keywords: .sp .RS .nf \fB(\fP\fIkeyword\fP\fB[\fP\fB,\fP\fIkeyword\fP\fB]\fP \fB... ) \fP .fi .RE .LP The values for all the keywords that are non-null shall be concatenated together, each separated by a \fB'/'\fP . The default shall be ( \fBpath\fP) if the keyword \fBpath\fP is defined; otherwise, the default shall be ( \fBprefix\fP, \fBname\fP). .TP 7 11. An additional conversion specifier character, \fBL\fP , shall be used to specify a symbolic line expansion. If the current file is a symbolic link, then \fB%L\fP shall expand to: .sp .RS .nf \fB"%s -> %s", <\fP\fIvalue of keyword\fP\fB>, <\fP\fIcontents of link\fP\fB> \fP .fi .RE .LP Otherwise, the \fB%L\fP conversion specification shall be the equivalent of \fB%F\fP . .sp .SH OPERANDS .LP The following operands shall be supported: .TP 7 \fIdirectory\fP The destination directory pathname for \fBcopy\fP mode. .TP 7 \fIfile\fP A pathname of a file to be copied or archived. .TP 7 \fIpattern\fP A pattern matching one or more pathnames of archive members. A pattern must be given in the name-generating notation of the pattern matching notation in \fIPattern Matching Notation\fP , including the filename expansion rules in \fIPatterns Used for Filename Expansion\fP . The default, if no \fIpattern\fP is specified, is to select all members in the archive. .sp .SH STDIN .LP In \fBwrite\fP mode, the standard input shall be used only if no \fIfile\fP operands are specified. It shall be a text file containing a list of pathnames, one per line, without leading or trailing s. .LP In \fBlist\fP and \fBread\fP modes, if \fB-f\fP is not specified, the standard input shall be an archive file. .LP Otherwise, the standard input shall not be used. .SH INPUT FILES .LP The input file named by the \fIarchive\fP option-argument, or standard input when the archive is read from there, shall be a file formatted according to one of the specifications in the EXTENDED DESCRIPTION section or some other implementation-defined format. .LP The file \fB/dev/tty\fP shall be used to write prompts and read responses. .SH ENVIRONMENT VARIABLES .LP The following environment variables shall affect the execution of \fIpax\fP: .TP 7 \fILANG\fP Provide a default value for the internationalization variables that are unset or null. (See the Base Definitions volume of IEEE\ Std\ 1003.1-2001, Section 8.2, Internationalization Variables for the precedence of internationalization variables used to determine the values of locale categories.) .TP 7 \fILC_ALL\fP If set to a non-empty string value, override the values of all the other internationalization variables. .TP 7 \fILC_COLLATE\fP .sp Determine the locale for the behavior of ranges, equivalence classes, and multi-character collating elements used in the pattern matching expressions for the \fIpattern\fP operand, the basic regular expression for the \fB-s\fP option, and the extended regular expression defined for the \fByesexpr\fP locale keyword in the \fILC_MESSAGES\fP category. .TP 7 \fILC_CTYPE\fP Determine the locale for the interpretation of sequences of bytes of text data as characters (for example, single-byte as opposed to multi-byte characters in arguments and input files), the behavior of character classes used in the extended regular expression defined for the \fByesexpr\fP locale keyword in the \fILC_MESSAGES\fP category, and pattern matching. .TP 7 \fILC_MESSAGES\fP Determine the locale for the processing of affirmative responses that should be used to affect the format and contents of diagnostic messages written to standard error. .TP 7 \fILC_TIME\fP Determine the format and contents of date and time strings when the \fB-v\fP option is specified. .TP 7 \fINLSPATH\fP Determine the location of message catalogs for the processing of \fILC_MESSAGES \&.\fP .TP 7 \fITMPDIR\fP Determine the pathname that provides part of the default global extended header record file, as described for the \fB-o\fP \fBglobexthdr=\fP keyword in the OPTIONS section. .TP 7 \fITZ\fP Determine the timezone used to calculate date and time strings when the \fB-v\fP option is specified. If \fITZ\fP is unset or null, an unspecified default timezone shall be used. .sp .SH ASYNCHRONOUS EVENTS .LP Default. .SH STDOUT .LP In \fBwrite\fP mode, if \fB-f\fP is not specified, the standard output shall be the archive formatted according to one of the specifications in the EXTENDED DESCRIPTION section, or some other implementation-defined format (see \fB-x\fP \fIformat\fP). .LP In \fBlist\fP mode, when the \fB-o\fP \fBlistopt\fP= \fIformat\fP has been specified, the selected archive members shall be written to standard output using the format described under List Mode Format Specifications . In \fBlist\fP mode without the \fB-o\fP \fBlistopt\fP= \fIformat\fP option, the table of contents of the selected archive members shall be written to standard output using the following format: .sp .RS .nf \fB"%s\\n", <\fP\fIpathname\fP\fB> \fP .fi .RE .LP If the \fB-v\fP option is specified in \fBlist\fP mode, the table of contents of the selected archive members shall be written to standard output using the following formats. .LP For pathnames representing hard links to previous members of the archive: .sp .RS .nf \fB"%s == %s\\n", <\fP\fIls\fP \fB-l\fP \fIlisting\fP\fB>, <\fP\fIlinkname\fP\fB> \fP .fi .RE .LP For all other pathnames: .sp .RS .nf \fB"%s\\n", <\fP\fIls\fP \fB-l\fP \fIlisting\fP\fB> \fP .fi .RE .LP where <\fIls\ \fP -l\ \fIlisting\fP> shall be the format specified by the \fIls\fP utility with the \fB-l\fP option. When writing pathnames in this format, it is unspecified what is written for fields for which the underlying archive format does not have the correct information, although the correct number of -separated fields shall be written. .LP In \fBlist\fP mode, standard output shall not be buffered more than a line at a time. .SH STDERR .LP If \fB-v\fP is specified in \fBread\fP, \fBwrite\fP, or \fBcopy\fP modes, \fIpax\fP shall write the pathnames it processes to the standard error output using the following format: .sp .RS .nf \fB"%s\\n", <\fP\fIpathname\fP\fB> \fP .fi .RE .LP These pathnames shall be written as soon as processing is begun on the file or archive member, and shall be flushed to standard error. The trailing , which shall not be buffered, is written when the file has been read or written. .LP If the \fB-s\fP option is specified, and the replacement string has a trailing \fB'p'\fP , substitutions shall be written to standard error in the following format: .sp .RS .nf \fB"%s >> %s\\n", <\fP\fIoriginal pathname\fP\fB>, <\fP\fInew pathname\fP\fB> \fP .fi .RE .LP In all operating modes of \fIpax\fP, optional messages of unspecified format concerning the input archive format and volume number, the number of files, blocks, volumes, and media parts as well as other diagnostic messages may be written to standard error. .LP In all formats, for both standard output and standard error, it is unspecified how non-printable characters in pathnames or link names are written. .LP When \fIpax\fP is in \fBread\fP mode or \fBlist\fP mode, using the \fB-x\fP \fBpax\fP archive format, and a filename, link name, owner name, or any other field in an extended header record cannot be translated from the \fBpax\fP UTF-8 codeset format to the codeset and current locale of the implementation, \fIpax\fP shall write a diagnostic message to standard error, shall process the file as described for the \fB-o\fP \fBinvalid=\fP option, and then shall process the next file in the archive. .SH OUTPUT FILES .LP In \fBread\fP mode, the extracted output files shall be of the archived file type. In \fBcopy\fP mode, the copied output files shall be the type of the file being copied. In either mode, existing files in the destination hierarchy shall be overwritten only when all permission ( \fB-p\fP), modification time ( \fB-u\fP), and invalid-value ( \fB-o\fP \fBinvalid\fP=) tests allow it. .LP In \fBwrite\fP mode, the output file named by the \fB-f\fP option-argument shall be a file formatted according to one of the specifications in the EXTENDED DESCRIPTION section, or some other implementation-defined format. .SH EXTENDED DESCRIPTION .SS pax Interchange Format .LP A \fIpax\fP archive tape or file produced in the \fB-x\fP \fBpax\fP format shall contain a series of blocks. The physical layout of the archive shall be identical to the \fBustar\fP format described in ustar Interchange Format . Each file archived shall be represented by the following sequence: .IP " *" 3 An optional header block with extended header records. This header block is of the form described in pax Header Block , with a \fItypeflag\fP value of \fBx\fP or \fBg\fP. The extended header records, described in pax Extended Header , shall be included as the data for this header block. .LP .IP " *" 3 A header block that describes the file. Any fields in the preceding optional extended header shall override the associated fields in this header block for this file. .LP .IP " *" 3 Zero or more blocks that contain the contents of the file. .LP .LP At the end of the archive file there shall be two 512-byte blocks filled with binary zeros, interpreted as an end-of-archive indicator. .LP A schematic of an example archive with global extended header records and two actual files is shown in pax Format Archive Example . In the example, the second file in the archive has no extended header preceding it, presumably because it has no need for extended attributes. .TP 7 .sp .RS \fBFigure: pax Format Archive Example\fP .RE .SS pax Header Block .LP The \fBpax\fP header block shall be identical to the \fBustar\fP header block described in ustar Interchange Format , except that two additional \fItypeflag\fP values are defined: .TP 7 \fBx\fP Represents extended header records for the following file in the archive (which shall have its own \fBustar\fP header block). The format of these extended header records shall be as described in pax Extended Header . .TP 7 \fBg\fP Represents global extended header records for the following files in the archive. The format of these extended header records shall be as described in pax Extended Header . Each value shall affect all subsequent files that do not override that value in their own extended header record and until another global extended header record is reached that provides another value for the same field. The \fItypeflag\fP \fBg\fP global headers should not be used with interchange media that could suffer partial data loss in transporting the archive. .sp .LP For both of these types, the \fIsize\fP field shall be the size of the extended header records in octets. The other fields in the header block are not meaningful to this version of the \fIpax\fP utility. However, if this archive is read by a \fIpax\fP utility conforming to the ISO\ POSIX-2:1993 standard, the header block fields are used to create a regular file that contains the extended header records as data. Therefore, header block field values should be selected to provide reasonable file access to this regular file. .LP A further difference from the \fBustar\fP header block is that data blocks for files of \fItypeflag\fP 1 (the digit one) (hard link) may be included, which means that the size field may be greater than zero. Archives created by \fIpax\fP \fB-o\fP \fBlinkdata\fP shall include these data blocks with the hard links. .SS pax Extended Header .LP A \fBpax\fP extended header contains values that are inappropriate for the \fBustar\fP header block because of limitations in that format: fields requiring a character encoding other than that described in the ISO/IEC\ 646:1991 standard, fields representing file attributes not described in the \fBustar\fP header, and fields whose format or length do not fit the requirements of the \fBustar\fP header. The values in an extended header add attributes to the following file (or files; see the description of the \fItypeflag\fP \fBg\fP header block) or override values in the following header block(s), as indicated in the following list of keywords. .LP An extended header shall consist of one or more records, each constructed as follows: .sp .RS .nf \fB"%d %s=%s\\n", <\fP\fIlength\fP\fB>, <\fP\fIkeyword\fP\fB>, <\fP\fIvalue\fP\fB> \fP .fi .RE .LP The extended header records shall be encoded according to the ISO/IEC\ 10646-1:2000 standard (UTF-8). The <\fIlength\fP> field, , equals sign, and shown shall be limited to the portable character set, as encoded in UTF-8. The <\fIkeyword\fP> and <\fIvalue\fP> fields can be any UTF-8 characters. The <\fIlength\fP> field shall be the decimal length of the extended header record in octets, including the trailing . .LP The <\fIkeyword\fP> field shall be one of the entries from the following list or a keyword provided as an implementation extension. Keywords consisting entirely of lowercase letters, digits, and periods are reserved for future standardization. A keyword shall not include an equals sign. (In the following list, the notations "file(s)" or "block(s)" is used to acknowledge that a keyword affects the following single file after a \fItypeflag\fP \fBx\fP extended header, but possibly multiple files after \fItypeflag\fP \fBg\fP. Any requirements in the list for \fIpax\fP to include a record when in \fBwrite\fP or \fBcopy\fP mode shall apply only when such a record has not already been provided through the use of the \fB-o\fP option. When used in \fBcopy\fP mode, \fIpax\fP shall behave as if an archive had been created with applicable extended header records and then extracted.) .TP 7 \fBatime\fP The file access time for the following file(s), equivalent to the value of the \fIst_atime\fP member of the \fBstat\fP structure for a file, as described by the \fIstat\fP() function. The access time shall be restored if the process has the appropriate privilege required to do so. The format of the <\fIvalue\fP> shall be as described in pax Extended Header File Times . .TP 7 \fBcharset\fP The name of the character set used to encode the data in the following file(s). The entries in the following table are defined to refer to known standards; additional names may be agreed on between the originator and recipient. .TS C center; l2 l. \fB\fP \fBFormal Standard\fP ISO-IR 646 1990 ISO/IEC 646:1990 ISO-IR 8859 1 1998 ISO/IEC 8859-1:1998 ISO-IR 8859 2 1999 ISO/IEC 8859-2:1999 ISO-IR 8859 3 1999 ISO/IEC 8859-3:1999 ISO-IR 8859 4 1998 ISO/IEC 8859-4:1998 ISO-IR 8859 5 1999 ISO/IEC 8859-5:1999 ISO-IR 8859 6 1999 ISO/IEC 8859-6:1999 ISO-IR 8859 7 1987 ISO/IEC 8859-7:1987 ISO-IR 8859 8 1999 ISO/IEC 8859-8:1999 ISO-IR 8859 9 1999 ISO/IEC 8859-9:1999 ISO-IR 8859 10 1998 ISO/IEC 8859-10:1998 ISO-IR 8859 13 1998 ISO/IEC 8859-13:1998 ISO-IR 8859 14 1998 ISO/IEC 8859-14:1998 ISO-IR 8859 15 1999 ISO/IEC 8859-15:1999 ISO-IR 10646 2000 ISO/IEC 10646:2000 ISO-IR 10646 2000 UTF-8 ISO/IEC 10646, UTF-8 encoding BINARY None. .TE .LP The encoding is included in an extended header for information only; when \fIpax\fP is used as described in IEEE\ Std\ 1003.1-2001, it shall not translate the file data into any other encoding. The \fBBINARY\fP entry indicates unencoded binary data. .LP When used in \fBwrite\fP or \fBcopy\fP mode, it is implementation-defined whether \fIpax\fP includes a \fBcharset\fP extended header record for a file. .TP 7 \fBcomment\fP A series of characters used as a comment. All characters in the <\fIvalue\fP> field shall be ignored by \fIpax\fP. .TP 7 \fBctime\fP The file creation time for the following file(s), equivalent to the value of the \fIst_ctime\fP member of the \fBstat\fP structure for a file, as described by the \fIstat\fP() function. The creation time shall be restored if the process has the appropriate privilege required to do so. The format of the <\fIvalue\fP> shall be as described in pax Extended Header File Times . .TP 7 \fBgid\fP The group ID of the group that owns the file, expressed as a decimal number using digits from the ISO/IEC\ 646:1991 standard. This record shall override the \fIgid\fP field in the following header block(s). When used in \fBwrite\fP or \fBcopy\fP mode, \fIpax\fP shall include a \fIgid\fP extended header record for each file whose group ID is greater than 2097151 (octal 7777777). .TP 7 \fBgname\fP The group of the file(s), formatted as a group name in the group database. This record shall override the \fIgid\fP and \fIgname\fP fields in the following header block(s), and any \fIgid\fP extended header record. When used in \fBread\fP, \fBcopy\fP, or \fBlist\fP mode, \fIpax\fP shall translate the name from the UTF-8 encoding in the header record to the character set appropriate for the group database on the receiving system. If any of the UTF-8 characters cannot be translated, and if the \fB-o\fP \fBinvalid=\fP UTF-8 option is not specified, the results are implementation-defined. When used in \fBwrite\fP or \fBcopy\fP mode, \fIpax\fP shall include a \fBgname\fP extended header record for each file whose group name cannot be represented entirely with the letters and digits of the portable character set. .TP 7 \fBlinkpath\fP The pathname of a link being created to another file, of any type, previously archived. This record shall override the \fIlinkname\fP field in the following \fBustar\fP header block(s). The following \fBustar\fP header block shall determine the type of link created. If \fItypeflag\fP of the following header block is 1, it shall be a hard link. If \fItypeflag\fP is 2, it shall be a symbolic link and the \fBlinkpath\fP value shall be the contents of the symbolic link. The \fIpax\fP utility shall translate the name of the link (contents of the symbolic link) from the UTF-8 encoding to the character set appropriate for the local file system. When used in \fBwrite\fP or \fBcopy\fP mode, \fIpax\fP shall include a \fBlinkpath\fP extended header record for each link whose pathname cannot be represented entirely with the members of the portable character set other than NUL. .TP 7 \fBmtime\fP The file modification time of the following file(s), equivalent to the value of the \fIst_mtime\fP member of the \fBstat\fP structure for a file, as described in the \fIstat\fP() function. This record shall override the \fImtime\fP field in the following header block(s). The modification time shall be restored if the process has the appropriate privilege required to do so. The format of the <\fIvalue\fP> shall be as described in pax Extended Header File Times . .TP 7 \fBpath\fP The pathname of the following file(s). This record shall override the \fIname\fP and \fIprefix\fP fields in the following header block(s). The \fIpax\fP utility shall translate the pathname of the file from the UTF-8 encoding to the character set appropriate for the local file system. .LP When used in \fBwrite\fP or \fBcopy\fP mode, \fIpax\fP shall include a \fIpath\fP extended header record for each file whose pathname cannot be represented entirely with the members of the portable character set other than NUL. .TP 7 \fBrealtime.\fP\fIany\fP The keywords prefixed by "realtime." are reserved for future standardization. .TP 7 \fBsecurity.\fP\fIany\fP The keywords prefixed by "security." are reserved for future standardization. .TP 7 \fBsize\fP The size of the file in octets, expressed as a decimal number using digits from the ISO/IEC\ 646:1991 standard. This record shall override the \fIsize\fP field in the following header block(s). When used in \fBwrite\fP or \fBcopy\fP mode, \fIpax\fP shall include a \fIsize\fP extended header record for each file with a size value greater than 8589934591 (octal 77777777777). .TP 7 \fBuid\fP The user ID of the file owner, expressed as a decimal number using digits from the ISO/IEC\ 646:1991 standard. This record shall override the \fIuid\fP field in the following header block(s). When used in \fBwrite\fP or \fBcopy\fP mode, \fIpax\fP shall include a \fIuid\fP extended header record for each file whose owner ID is greater than 2097151 (octal 7777777). .TP 7 \fBuname\fP The owner of the following file(s), formatted as a user name in the user database. This record shall override the \fIuid\fP and \fIuname\fP fields in the following header block(s), and any \fIuid\fP extended header record. When used in \fBread\fP, \fBcopy\fP, or \fBlist\fP mode, \fIpax\fP shall translate the name from the UTF-8 encoding in the header record to the character set appropriate for the user database on the receiving system. If any of the UTF-8 characters cannot be translated, and if the \fB-o\fP \fBinvalid=\fP UTF-8 option is not specified, the results are implementation-defined. When used in \fBwrite\fP or \fBcopy\fP mode, \fIpax\fP shall include a \fBuname\fP extended header record for each file whose user name cannot be represented entirely with the letters and digits of the portable character set. .sp .LP If the <\fIvalue\fP> field is zero length, it shall delete any header block field, previously entered extended header value, or global extended header value of the same name. .LP If a keyword in an extended header record (or in a \fB-o\fP option-argument) overrides or deletes a corresponding field in the \fBustar\fP header block, \fIpax\fP shall ignore the contents of that header block field. .LP Unlike the \fBustar\fP header block fields, NULs shall not delimit <\fIvalue\fP>s; all characters within the <\fIvalue\fP> field shall be considered data for the field. None of the length limitations of the \fBustar\fP header block fields in ustar Header Block shall apply to the extended header records. .SS pax Extended Header Keyword Precedence .LP This section describes the precedence in which the various header records and fields and command line options are selected to apply to a file in the archive. When \fIpax\fP is used in \fBread\fP or \fBlist\fP modes, it shall determine a file attribute in the following sequence: .IP " 1." 4 If \fB-o\fP \fBdelete=\fP \fIkeyword-prefix\fP is used, the affected attributes shall be determined from step 7., if applicable, or ignored otherwise. .LP .IP " 2." 4 If \fB-o\fP \fIkeyword\fP:= is used, the affected attributes shall be ignored. .LP .IP " 3." 4 If \fB-o\fP \fIkeyword\fP \fB:=\fP \fIvalue\fP is used, the affected attribute shall be assigned the value. .LP .IP " 4." 4 If there is a \fItypeflag\fP \fBx\fP extended header record, the affected attribute shall be assigned the <\fIvalue\fP>. When extended header records conflict, the last one given in the header shall take precedence. .LP .IP " 5." 4 If \fB-o\fP \fIkeyword\fP \fB=\fP \fIvalue\fP is used, the affected attribute shall be assigned the value. .LP .IP " 6." 4 If there is a \fItypeflag\fP \fBg\fP global extended header record, the affected attribute shall be assigned the <\fIvalue\fP>. When global extended header records conflict, the last one given in the global header shall take precedence. .LP .IP " 7." 4 Otherwise, the attribute shall be determined from the \fBustar\fP header block. .LP .SS pax Extended Header File Times .LP The \fIpax\fP utility shall write an \fBmtime\fP record for each file in \fBwrite\fP or \fBcopy\fP modes if the file's modification time cannot be represented exactly in the \fBustar\fP header logical record described in ustar Interchange Format . This can occur if the time is out of \fBustar\fP range, or if the file system of the underlying implementation supports non-integer time granularities and the time is not an integer. All of these time records shall be formatted as a decimal representation of the time in seconds since the Epoch. If a period ( \fB'.'\fP ) decimal point character is present, the digits to the right of the point shall represent the units of a subsecond timing granularity, where the first digit is tenths of a second and each subsequent digit is a tenth of the previous digit. In \fBread\fP or \fBcopy\fP mode, the \fIpax\fP utility shall truncate the time of a file to the greatest value that is not greater than the input header file time. In \fBwrite\fP or \fBcopy\fP mode, the \fIpax\fP utility shall output a time exactly if it can be represented exactly as a decimal number, and otherwise shall generate only enough digits so that the same time shall be recovered if the file is extracted on a system whose underlying implementation supports the same time granularity. .SS ustar Interchange Format .LP A \fBustar\fP archive tape or file shall contain a series of logical records. Each logical record shall be a fixed-size logical record of 512 octets (see below). Although this format may be thought of as being stored on 9-track industry-standard 12.7 mm (0.5 in) magnetic tape, other types of transportable media are not excluded. Each file archived shall be represented by a header logical record that describes the file, followed by zero or more logical records that give the contents of the file. At the end of the archive file there shall be two 512-octet logical records filled with binary zeros, interpreted as an end-of-archive indicator. .LP The logical records may be grouped for physical I/O operations, as described under the \fB-b\fP \fIblocksize\fP and \fB-x\fP \fBustar\fP options. Each group of logical records may be written with a single operation equivalent to the \fIwrite\fP() function. On magnetic tape, the result of this write shall be a single tape physical block. The last physical block shall always be the full size, so logical records after the two zero logical records may contain undefined data. .LP The header logical record shall be structured as shown in the following table. All lengths and offsets are in decimal. .br .sp .RS \fBTable: ustar Header Block\fP .TS C center; l l l. \fBField Name\fP \fBOctet Offset\fP \fBLength (in Octets)\fP \fIname\fP 0 100 \fImode\fP 100 8 \fIuid\fP 108 8 \fIgid\fP 116 8 \fIsize\fP 124 12 \fImtime\fP 136 12 \fIchksum\fP 148 8 \fItypeflag\fP 156 1 \fIlinkname\fP 157 100 \fImagic\fP 257 6 \fIversion\fP 263 2 \fIuname\fP 265 32 \fIgname\fP 297 32 \fIdevmajor\fP 329 8 \fIdevminor\fP 337 8 \fIprefix\fP 345 155 .TE .RE .LP All characters in the header logical record shall be represented in the coded character set of the ISO/IEC\ 646:1991 standard. For maximum portability between implementations, names should be selected from characters represented by the portable filename character set as octets with the most significant bit zero. If an implementation supports the use of characters outside of slash and the portable filename character set in names for files, users, and groups, one or more implementation-defined encodings of these characters shall be provided for interchange purposes. .LP However, the \fIpax\fP utility shall never create filenames on the local system that cannot be accessed via the procedures described in IEEE\ Std\ 1003.1-2001. If a filename is found on the medium that would create an invalid filename, it is implementation-defined whether the data from the file is stored on the file hierarchy and under what name it is stored. The \fIpax\fP utility may choose to ignore these files as long as it produces an error indicating that the file is being ignored. .LP Each field within the header logical record is contiguous; that is, there is no padding used. Each character on the archive medium shall be stored contiguously. .LP The fields \fImagic\fP, \fIuname\fP, and \fIgname\fP are character strings each terminated by a NUL character. The fields \fIname\fP, \fIlinkname\fP, and \fIprefix\fP are NUL-terminated character strings except when all characters in the array contain non-NUL characters including the last character. The \fIversion\fP field is two octets containing the characters \fB"00"\fP (zero-zero). The \fItypeflag\fP contains a single character. All other fields are leading zero-filled octal numbers using digits from the ISO/IEC\ 646:1991 standard IRV. Each numeric field is terminated by one or more or NUL characters. .LP The \fIname\fP and the \fIprefix\fP fields shall produce the pathname of the file. A new pathname shall be formed, if \fIprefix\fP is not an empty string (its first character is not NUL), by concatenating \fIprefix\fP (up to the first NUL character), a slash character, and \fIname\fP; otherwise, \fIname\fP is used alone. In either case, \fIname\fP is terminated at the first NUL character. If \fIprefix\fP begins with a NUL character, it shall be ignored. In this manner, pathnames of at most 256 characters can be supported. If a pathname does not fit in the space provided, \fIpax\fP shall notify the user of the error, and shall not store any part of the file-header or data-on the medium. .LP The \fIlinkname\fP field, described below, shall not use the \fIprefix\fP to produce a pathname. As such, a \fIlinkname\fP is limited to 100 characters. If the name does not fit in the space provided, \fIpax\fP shall notify the user of the error, and shall not attempt to store the link on the medium. .LP The \fImode\fP field provides 12 bits encoded in the ISO/IEC\ 646:1991 standard octal digit representation. The encoded bits shall represent the following values: .br .sp .RS \fBTable: ustar \fImode\fP Field\fP .TS C center; l1 l1 lw(37). \fBBit Value\fP \fBIEEE\ Std\ 1003.1-2001 Bit\fP T{ .na \fBDescription\fP .ad T} 04000 S_ISUID T{ .na Set UID on execution. .ad T} 02000 S_ISGID T{ .na Set GID on execution. .ad T} 01000 T{ .na Reserved for future standardization. .ad T} 00400 S_IRUSR T{ .na Read permission for file owner class. .ad T} 00200 S_IWUSR T{ .na Write permission for file owner class. .ad T} 00100 S_IXUSR T{ .na Execute/search permission for file owner class. .ad T} 00040 S_IRGRP T{ .na Read permission for file group class. .ad T} 00020 S_IWGRP T{ .na Write permission for file group class. .ad T} 00010 S_IXGRP T{ .na Execute/search permission for file group class. .ad T} 00004 S_IROTH T{ .na Read permission for file other class. .ad T} 00002 S_IWOTH T{ .na Write permission for file other class. .ad T} 00001 S_IXOTH T{ .na Execute/search permission for file other class. .ad T} .TE .RE .LP When appropriate privilege is required to set one of these mode bits, and the user restoring the files from the archive does not have the appropriate privilege, the mode bits for which the user does not have appropriate privilege shall be ignored. Some of the mode bits in the archive format are not mentioned elsewhere in this volume of IEEE\ Std\ 1003.1-2001. If the implementation does not support those bits, they may be ignored. .LP The \fIuid\fP and \fIgid\fP fields are the user and group ID of the owner and group of the file, respectively. .LP The \fIsize\fP field is the size of the file in octets. If the \fItypeflag\fP field is set to specify a file to be of type 1 (a link) or 2 (a symbolic link), the \fIsize\fP field shall be specified as zero. If the \fItypeflag\fP field is set to specify a file of type 5 (directory), the \fIsize\fP field shall be interpreted as described under the definition of that record type. No data logical records are stored for types 1, 2, or 5. If the \fItypeflag\fP field is set to 3 (character special file), 4 (block special file), or 6 (FIFO), the meaning of the \fIsize\fP field is unspecified by this volume of IEEE\ Std\ 1003.1-2001, and no data logical records shall be stored on the medium. Additionally, for type 6, the \fIsize\fP field shall be ignored when reading. If the \fItypeflag\fP field is set to any other value, the number of logical records written following the header shall be ( \fIsize\fP+511)/512, ignoring any fraction in the result of the division. .LP The \fImtime\fP field shall be the modification time of the file at the time it was archived. It is the ISO/IEC\ 646:1991 standard representation of the octal value of the modification time obtained from the \fIstat\fP() function. .LP The \fIchksum\fP field shall be the ISO/IEC\ 646:1991 standard IRV representation of the octal value of the simple sum of all octets in the header logical record. Each octet in the header shall be treated as an unsigned value. These values shall be added to an unsigned integer, initialized to zero, the precision of which is not less than 17 bits. When calculating the checksum, the \fIchksum\fP field is treated as if it were all spaces. .LP The \fItypeflag\fP field specifies the type of file archived. If a particular implementation does not recognize the type, or the user does not have appropriate privilege to create that type, the file shall be extracted as if it were a regular file if the file type is defined to have a meaning for the \fIsize\fP field that could cause data logical records to be written on the medium (see the previous description for \fIsize\fP). If conversion to a regular file occurs, the \fIpax\fP utility shall produce an error indicating that the conversion took place. All of the \fItypeflag\fP fields shall be coded in the ISO/IEC\ 646:1991 standard IRV: .TP 7 \fB0\fP Represents a regular file. For backwards-compatibility, a \fItypeflag\fP value of binary zero ( \fB'\\0'\fP ) should be recognized as meaning a regular file when extracting files from the archive. Archives written with this version of the archive file format create regular files with a \fItypeflag\fP value of the ISO/IEC\ 646:1991 standard IRV \fB'0'\fP . .TP 7 \fB1\fP Represents a file linked to another file, of any type, previously archived. Such files are identified by each file having the same device and file serial number. The linked-to name is specified in the \fIlinkname\fP field with a NUL-character terminator if it is less than 100 octets in length. .TP 7 \fB2\fP Represents a symbolic link. The contents of the symbolic link shall be stored in the \fIlinkname\fP field. .TP 7 \fB3,4\fP Represent character special files and block special files respectively. In this case the \fIdevmajor\fP and \fIdevminor\fP fields shall contain information defining the device, the format of which is unspecified by this volume of IEEE\ Std\ 1003.1-2001. Implementations may map the device specifications to their own local specification or may ignore the entry. .TP 7 \fB5\fP Specifies a directory or subdirectory. On systems where disk allocation is performed on a directory basis, the \fIsize\fP field shall contain the maximum number of octets (which may be rounded to the nearest disk block allocation unit) that the directory may hold. A \fIsize\fP field of zero indicates no such limiting. Systems that do not support limiting in this manner should ignore the \fIsize\fP field. .TP 7 \fB6\fP Specifies a FIFO special file. Note that the archiving of a FIFO file archives the existence of this file and not its contents. .TP 7 \fB7\fP Reserved to represent a file to which an implementation has associated some high-performance attribute. Implementations without such extensions should treat this file as a regular file (type 0). .TP 7 \fBA-Z\fP The letters \fB'A'\fP to \fB'Z'\fP , inclusive, are reserved for custom implementations. All other values are reserved for future versions of IEEE\ Std\ 1003.1-2001. .sp .LP Attempts to archive a socket using \fBustar\fP interchange format shall produce a diagnostic message. Handling of other file types is implementation-defined. .LP The \fImagic\fP field is the specification that this archive was output in this archive format. If this field contains \fBustar\fP (the five characters from the ISO/IEC\ 646:1991 standard IRV shown followed by NUL), the \fIuname\fP and \fIgname\fP fields shall contain the ISO/IEC\ 646:1991 standard IRV representation of the owner and group of the file, respectively (truncated to fit, if necessary). When the file is restored by a privileged, protection-preserving version of the utility, the user and group databases shall be scanned for these names. If found, the user and group IDs contained within these files shall be used rather than the values contained within the \fIuid\fP and \fIgid\fP fields. .SS cpio Interchange Format .LP The octet-oriented \fBcpio\fP archive format shall be a series of entries, each comprising a header that describes the file, the name of the file, and then the contents of the file. .LP An archive may be recorded as a series of fixed-size blocks of octets. This blocking shall be used only to make physical I/O more efficient. The last group of blocks shall always be at the full size. .LP For the octet-oriented \fBcpio\fP archive format, the individual entry information shall be in the order indicated and described by the following table; see also the \fI\fP header. .br .sp .RS \fBTable: Octet-Oriented cpio Archive Entry\fP .TS C center; l2 l2 l. \fBHeader Field Name\fP \fBLength (in Octets)\fP \fBInterpreted as\fP \fIc_magic\fP 6 Octal number \fIc_dev\fP 6 Octal number \fIc_ino\fP 6 Octal number \fIc_mode\fP 6 Octal number \fIc_uid\fP 6 Octal number \fIc_gid\fP 6 Octal number \fIc_nlink\fP 6 Octal number \fIc_rdev\fP 6 Octal number \fIc_mtime\fP 11 Octal number \fIc_namesize\fP 6 Octal number \fIc_filesize\fP 11 Octal number \fBFilename Field Name\fP \fBLength\fP \fBInterpreted as\fP \fIc_name\fP \fIc_namesize\fP Pathname string \fBFile Data Field Name\fP \fBLength\fP \fBInterpreted as\fP \fIc_filedata\fP \fIc_filesize\fP Data .TE .RE .SS cpio Header .LP For each file in the archive, a header as defined previously shall be written. The information in the header fields is written as streams of the ISO/IEC\ 646:1991 standard characters interpreted as octal numbers. The octal numbers shall be extended to the necessary length by appending the ISO/IEC\ 646:1991 standard IRV zeros at the most-significant-digit end of the number; the result is written to the most-significant digit of the stream of octets first. The fields shall be interpreted as follows: .TP 7 \fIc_magic\fP Identify the archive as being a transportable archive by containing the identifying value \fB"070707"\fP . .TP 7 \fIc_dev\fP,\ \fIc_ino\fP Contains values that uniquely identify the file within the archive (that is, no files contain the same pair of \fIc_dev\fP and \fIc_ino\fP values unless they are links to the same file). The values shall be determined in an unspecified manner. .TP 7 \fIc_mode\fP Contains the file type and access permissions as defined in the following table. .br .sp .RS \fBTable: Values for cpio c_mode Field\fP .TS C center; l2 l2 l. \fBFile Permissions Name\fP \fBValue\fP \fBIndicates\fP C_IRUSR 000400 Read by owner C_IWUSR 000200 Write by owner C_IXUSR 000100 Execute by owner C_IRGRP 000040 Read by group C_IWGRP 000020 Write by group C_IXGRP 000010 Execute by group C_IROTH 000004 Read by others C_IWOTH 000002 Write by others C_IXOTH 000001 Execute by others C_ISUID 004000 Set \fIuid\fP C_ISGID 002000 Set \fIgid\fP C_ISVTX 001000 Reserved \fBFile Type Name\fP \fBValue\fP \fBIndicates\fP C_ISDIR 040000 Directory C_ISFIFO 010000 FIFO C_ISREG 0100000 Regular file C_ISLNK 0120000 Symbolic link C_ISBLK 060000 Block special file C_ISCHR 020000 Character special file C_ISSOCK 0140000 Socket C_ISCTG 0110000 Reserved .TE .RE .LP Directories, FIFOs, symbolic links, and regular files shall be supported on a system conforming to this volume of IEEE\ Std\ 1003.1-2001; additional values defined previously are reserved for compatibility with existing systems. Additional file types may be supported; however, such files should not be written to archives intended to be transported to other systems. .TP 7 \fIc_uid\fP Contains the user ID of the owner. .TP 7 \fIc_gid\fP Contains the group ID of the group. .TP 7 \fIc_nlink\fP Contains the number of links referencing the file at the time the archive was created. .TP 7 \fIc_rdev\fP Contains implementation-defined information for character or block special files. .TP 7 \fIc_mtime\fP Contains the latest time of modification of the file at the time the archive was created. .TP 7 \fIc_namesize\fP Contains the length of the pathname, including the terminating NUL character. .TP 7 \fIc_filesize\fP Contains the length of the file in octets. This shall be the length of the data section following the header structure. .sp .SS cpio Filename .LP The \fIc_name\fP field shall contain the pathname of the file. The length of this field in octets is the value of \fIc_namesize\fP. .LP If a filename is found on the medium that would create an invalid pathname, it is implementation-defined whether the data from the file is stored on the file hierarchy and under what name it is stored. .LP All characters shall be represented in the ISO/IEC\ 646:1991 standard IRV. For maximum portability between implementations, names should be selected from characters represented by the portable filename character set as octets with the most significant bit zero. If an implementation supports the use of characters outside the portable filename character set in names for files, users, and groups, one or more implementation-defined encodings of these characters shall be provided for interchange purposes. However, the \fIpax\fP utility shall never create filenames on the local system that cannot be accessed via the procedures described previously in this volume of IEEE\ Std\ 1003.1-2001. If a filename is found on the medium that would create an invalid filename, it is implementation-defined whether the data from the file is stored on the local file system and under what name it is stored. The \fIpax\fP utility may choose to ignore these files as long as it produces an error indicating that the file is being ignored. .SS cpio File Data .LP Following \fIc_name\fP, there shall be \fIc_filesize\fP octets of data. Interpretation of such data occurs in a manner dependent on the file. If \fIc_filesize\fP is zero, no data shall be contained in \fIc_filedata\fP. .LP When restoring from an archive: .IP " *" 3 If the user does not have the appropriate privilege to create a file of the specified type, \fIpax\fP shall ignore the entry and write an error message to standard error. .LP .IP " *" 3 Only regular files have data to be restored. Presuming a regular file meets any selection criteria that might be imposed on the format-reading utility by the user, such data shall be restored. .LP .IP " *" 3 If a user does not have appropriate privilege to set a particular mode flag, the flag shall be ignored. Some of the mode flags in the archive format are not mentioned elsewhere in this volume of IEEE\ Std\ 1003.1-2001. If the implementation does not support those flags, they may be ignored. .LP .SS cpio Special Entries .LP FIFO special files, directories, and the trailer shall be recorded with \fIc_filesize\fP equal to zero. For other special files, \fIc_filesize\fP is unspecified by this volume of IEEE\ Std\ 1003.1-2001. The header for the next file entry in the archive shall be written directly after the last octet of the file entry preceding it. A header denoting the filename \fBTRAILER!!!\fP shall indicate the end of the archive; the contents of octets in the last block of the archive following such a header are undefined. .SH EXIT STATUS .LP The following exit values shall be returned: .TP 7 \ 0 All files were processed successfully. .TP 7 >0 An error occurred. .sp .SH CONSEQUENCES OF ERRORS .LP If \fIpax\fP cannot create a file or a link when reading an archive or cannot find a file when writing an archive, or cannot preserve the user ID, group ID, or file mode when the \fB-p\fP option is specified, a diagnostic message shall be written to standard error and a non-zero exit status shall be returned, but processing shall continue. In the case where \fIpax\fP cannot create a link to a file, \fIpax\fP shall not, by default, create a second copy of the file. .LP If the extraction of a file from an archive is prematurely terminated by a signal or error, \fIpax\fP may have only partially extracted the file or (if the \fB-n\fP option was not specified) may have extracted a file of the same name as that specified by the user, but which is not the file the user wanted. Additionally, the file modes of extracted directories may have additional bits from the S_IRWXU mask set as well as incorrect modification and access times. .LP \fIThe following sections are informative.\fP .SH APPLICATION USAGE .LP The \fB-p\fP (privileges) option was invented to reconcile differences between historical \fItar\fP and \fIcpio\fP implementations. In particular, the two utilities use \fB-m\fP in diametrically opposed ways. The \fB-p\fP option also provides a consistent means of extending the ways in which future file attributes can be addressed, such as for enhanced security systems or high-performance files. Although it may seem complex, there are really two modes that are most commonly used: .TP 7 \fB-p\ e\fP ``Preserve everything". This would be used by the historical superuser, someone with all the appropriate privileges, to preserve all aspects of the files as they are recorded in the archive. The \fBe\fP flag is the sum of \fBo\fP and \fBp\fP, and other implementation-defined attributes. .TP 7 \fB-p\ p\fP ``Preserve" the file mode bits. This would be used by the user with regular privileges who wished to preserve aspects of the file other than the ownership. The file times are preserved by default, but two other flags are offered to disable these and use the time of extraction. .sp .LP The one pathname per line format of standard input precludes pathnames containing s. Although such pathnames violate the portable filename guidelines, they may exist and their presence may inhibit usage of \fIpax\fP within shell scripts. This problem is inherited from historical archive programs. The problem can be avoided by listing filename arguments on the command line instead of on standard input. .LP It is almost certain that appropriate privileges are required for \fIpax\fP to accomplish parts of this volume of IEEE\ Std\ 1003.1-2001. Specifically, creating files of type block special or character special, restoring file access times unless the files are owned by the user (the \fB-t\fP option), or preserving file owner, group, and mode (the \fB-p\fP option) all probably require appropriate privileges. .LP In \fBread\fP mode, implementations are permitted to overwrite files when the archive has multiple members with the same name. This may fail if permissions on the first version of the file do not permit it to be overwritten. .LP The \fBcpio\fP and \fBustar\fP formats can only support files up to 8589934592 bytes (8 * 2^30) in size. .SH EXAMPLES .LP The following command: .sp .RS .nf \fBpax -w -f /dev/rmt/1m . \fP .fi .RE .LP copies the contents of the current directory to tape drive 1, medium density (assuming historical System V device naming procedures-the historical BSD device name would be \fB/dev/rmt9\fP). .LP The following commands: .sp .RS .nf \fBmkdir\fP \fInewdir\fP\fBpax -rw\fP \fIolddir newdir\fP .fi .RE .LP copy the \fIolddir\fP directory hierarchy to \fInewdir\fP. .sp .RS .nf \fBpax -r -s ',^//*usr//*,,' -f a.pax \fP .fi .RE .LP reads the archive \fBa.pax\fP, with all files rooted in \fB/usr\fP in the archive extracted relative to the current directory. .LP Using the option: .sp .RS .nf \fB-o listopt="%M %(atime)T %(size)D %(name)s" \fP .fi .RE .LP overrides the default output description in Standard Output and instead writes: .sp .RS .nf \fB-rw-rw--- Jan 12 15:53 1492 /usr/foo/bar \fP .fi .RE .LP Using the options: .sp .RS .nf \fB-o listopt='%L\\t%(size)D\\n%.7' \\ -o listopt='(name)s\\n%(ctime)T\\n%T' \fP .fi .RE .LP overrides the default output description in Standard Output and instead writes: .sp .RS .nf \fB/usr/foo/bar -> /tmp 1492 /usr/fo Jan 12 1991 Jan 31 15:53 \fP .fi .RE .SH RATIONALE .LP The \fIpax\fP utility was new for the ISO\ POSIX-2:1993 standard. It represents a peaceful compromise between advocates of the historical \fItar\fP and \fIcpio\fP utilities. .LP A fundamental difference between \fIcpio\fP and \fItar\fP was in the way directories were treated. The \fIcpio\fP utility did not treat directories differently from other files, and to select a directory and its contents required that each file in the hierarchy be explicitly specified. For \fItar\fP, a directory matched every file in the file hierarchy it rooted. .LP The \fIpax\fP utility offers both interfaces; by default, directories map into the file hierarchy they root. The \fB-d\fP option causes \fIpax\fP to skip any file not explicitly referenced, as \fIcpio\fP historically did. The \fItar\fP \fB-\fP \fIstyle\fP behavior was chosen as the default because it was believed that this was the more common usage and because \fItar\fP is the more commonly available interface, as it was historically provided on both System V and BSD implementations. .LP The data interchange format specification in this volume of IEEE\ Std\ 1003.1-2001 requires that processes with "appropriate privileges" shall always restore the ownership and permissions of extracted files exactly as archived. If viewed from the historic equivalence between superuser and "appropriate privileges", there are two problems with this requirement. First, users running as superusers may unknowingly set dangerous permissions on extracted files. Second, it is needlessly limiting, in that superusers cannot extract files and own them as superuser unless the archive was created by the superuser. (It should be noted that restoration of ownerships and permissions for the superuser, by default, is historical practice in \fIcpio\fP, but not in \fItar\fP.) In order to avoid these two problems, the \fIpax\fP specification has an additional "privilege" mechanism, the \fB-p\fP option. Only a \fIpax\fP invocation with the privileges needed, and which has the \fB-p\fP option set using the \fBe\fP specification character, has the "appropriate privilege" to restore full ownership and permission information. .LP Note also that this volume of IEEE\ Std\ 1003.1-2001 requires that the file ownership and access permissions shall be set, on extraction, in the same fashion as the \fIcreat\fP() function when provided with the mode stored in the archive. This means that the file creation mask of the user is applied to the file permissions. .LP Users should note that directories may be created by \fIpax\fP while extracting files with permissions that are different from those that existed at the time the archive was created. When extracting sensitive information into a directory hierarchy that no longer exists, users are encouraged to set their file creation mask appropriately to protect these files during extraction. .LP The table of contents output is written to standard output to facilitate pipeline processing. .LP An early proposal had hard links displaying for all pathnames. This was removed because it complicates the output of the case where \fB-v\fP is not specified and does not match historical \fIcpio\fP usage. The hard-link information is available in the \fB-v\fP display. .LP The description of the \fB-l\fP option allows implementations to make hard links to symbolic links. IEEE\ Std\ 1003.1-2001 does not specify any way to create a hard link to a symbolic link, but many implementations provide this capability as an extension. If there are hard links to symbolic links when an archive is created, the implementation is required to archive the hard link in the archive (unless \fB-H\fP or \fB-L\fP is specified). When in \fBread\fP mode and in \fBcopy\fP mode, implementations supporting hard links to symbolic links should use them when appropriate. .LP The archive formats inherited from the POSIX.1-1990 standard have certain restrictions that have been brought along from historical usage. For example, there are restrictions on the length of pathnames stored in the archive. When \fIpax\fP is used in \fBcopy\fP( \fB-rw\fP) mode (copying directory hierarchies), the ability to use extensions from the \fB-x\fP \fBpax\fP format overcomes these restrictions. .LP The default \fIblocksize\fP value of 5120 bytes for \fIcpio\fP was selected because it is one of the standard block-size values for \fIcpio\fP, set when the \fB-B\fP option is specified. (The other default block-size value for \fIcpio\fP is 512 bytes, and this was considered to be too small.) The default block value of 10240 bytes for \fItar\fP was selected because that is the standard block-size value for BSD \fItar\fP. The maximum block size of 32256 bytes (2**15-512 bytes) is the largest multiple of 512 bytes that fits into a signed 16-bit tape controller transfer register. There are known limitations in some historical systems that would prevent larger blocks from being accepted. Historical values were chosen to improve compatibility with historical scripts using \fIdd\fP or similar utilities to manipulate archives. Also, default block sizes for any file type other than character special file has been deleted from this volume of IEEE\ Std\ 1003.1-2001 as unimportant and not likely to affect the structure of the resulting archive. .LP Implementations are permitted to modify the block-size value based on the archive format or the device to which the archive is being written. This is to provide implementations with the opportunity to take advantage of special types of devices, and it should not be used without a great deal of consideration as it almost certainly decreases archive portability. .LP The intended use of the \fB-n\fP option was to permit extraction of one or more files from the archive without processing the entire archive. This was viewed by the standard developers as offering significant performance advantages over historical implementations. The \fB-n\fP option in early proposals had three effects; the first was to cause special characters in patterns to not be treated specially. The second was to cause only the first file that matched a pattern to be extracted. The third was to cause \fIpax\fP to write a diagnostic message to standard error when no file was found matching a specified pattern. Only the second behavior is retained by this volume of IEEE\ Std\ 1003.1-2001, for many reasons. First, it is in general not acceptable for a single option to have multiple effects. Second, the ability to make pattern matching characters act as normal characters is useful for parts of \fIpax\fP other than file extraction. Third, a finer degree of control over the special characters is useful because users may wish to normalize only a single special character in a single filename. Fourth, given a more general escape mechanism, the previous behavior of the \fB-n\fP option can be easily obtained using the \fB-s\fP option or a \fIsed\fP script. Finally, writing a diagnostic message when a pattern specified by the user is unmatched by any file is useful behavior in all cases. .LP In this version, the \fB-n\fP was removed from the \fBcopy\fP mode synopsis of \fIpax\fP; it is inapplicable because there are no pattern operands specified in this mode. .LP There is another method than \fIpax\fP for copying subtrees in IEEE\ Std\ 1003.1-2001 described as part of the \fIcp\fP utility. Both methods are historical practice: \fIcp\fP provides a simpler, more intuitive interface, while \fIpax\fP offers a finer granularity of control. Each provides additional functionality to the other; in particular, \fIpax\fP maintains the hard-link structure of the hierarchy while \fIcp\fP does not. It is the intention of the standard developers that the results be similar (using appropriate option combinations in both utilities). The results are not required to be identical; there seemed insufficient gain to applications to balance the difficulty of implementations having to guarantee that the results would be exactly identical. .LP A single archive may span more than one file. It is suggested that implementations provide informative messages to the user on standard error whenever the archive file is changed. .LP The \fB-d\fP option (do not create intermediate directories not listed in the archive) found in early proposals was originally provided as a complement to the historic \fB-d\fP option of \fIcpio\fP. It has been deleted. .LP The \fB-s\fP option in early proposals specified a subset of the substitution command from the \fIed\fP utility. As there was no reason for only a subset to be supported, the \fB-s\fP option is now compatible with the current \fIed\fP specification. Since the delimiter can be any non-null character, the following usage with single spaces is valid: .sp .RS .nf \fBpax -s " foo bar " ... \fP .fi .RE .LP The \fB-t\fP description is worded so as to note that this may cause the access time update caused by some other activity (which occurs while the file is being read) to be overwritten. .LP The default behavior of \fIpax\fP with regard to file modification times is the same as historical implementations of \fItar\fP. It is not the historical behavior of \fIcpio\fP. .LP Because the \fB-i\fP option uses \fB/dev/tty\fP, utilities without a controlling terminal are not able to use this option. .LP The \fB-y\fP option, found in early proposals, has been deleted because a line containing a single period for the \fB-i\fP option has equivalent functionality. The special lines for the \fB-i\fP option (a single period and the empty line) are historical practice in \fIcpio\fP. .LP In early drafts, a \fB-e\fP \fIcharmap\fP option was included to increase portability of files between systems using different coded character sets. This option was omitted because it was apparent that consensus could not be formed for it. In this version, the use of UTF-8 should be an adequate substitute. .LP The \fB-k\fP option was added to address international concerns about the dangers involved in the character set transformations of \fB-e\fP (if the target character set were different from the source, the filenames might be transformed into names matching existing files) and also was made more general to protect files transferred between file systems with different {NAME_MAX} values (truncating a filename on a smaller system might also inadvertently overwrite existing files). As stated, it prevents any overwriting, even if the target file is older than the source. This version adds more granularity of options to solve this problem by introducing the \fB-o\fP \fBinvalid=\fP option-specifically the UTF-8 action. (Note that an existing file that is named with a UTF-8 encoding is still subject to overwriting in this case. The \fB-k\fP option closes that loophole.) .LP Some of the file characteristics referenced in this volume of IEEE\ Std\ 1003.1-2001 might not be supported by some archive formats. For example, neither the \fBtar\fP nor \fBcpio\fP formats contain the file access time. For this reason, the \fBe\fP specification character has been provided, intended to cause all file characteristics specified in the archive to be retained. .LP It is required that extracted directories, by default, have their access and modification times and permissions set to the values specified in the archive. This has obvious problems in that the directories are almost certainly modified after being extracted and that directory permissions may not permit file creation. One possible solution is to create directories with the mode specified in the archive, as modified by the \fIumask\fP of the user, with sufficient permissions to allow file creation. After all files have been extracted, \fIpax\fP would then reset the access and modification times and permissions as necessary. .LP The list-mode formatting description borrows heavily from the one defined by the \fIprintf\fP utility. However, since there is no separate operand list to get conversion arguments, the format was extended to allow specifying the name of the conversion argument as part of the conversion specification. .LP The \fBT\fP conversion specifier allows time fields to be displayed in any of the date formats. Unlike the \fIls\fP utility, \fIpax\fP does not adjust the format when the date is less than six months in the past. This makes parsing the output more predictable. .LP The \fBD\fP conversion specifier handles the ability to display the major/minor or file size, as with \fIls\fP, by using \fB%-8(\fP\fIsize\fP\fB)D\fP. .LP The \fBL\fP conversion specifier handles the \fIls\fP display for symbolic links. .LP Conversion specifiers were added to generate existing known types used for \fIls\fP. .SS pax Interchange Format .LP The new POSIX data interchange format was developed primarily to satisfy international concerns that the \fBustar\fP and \fBcpio\fP formats did not provide for file, user, and group names encoded in characters outside a subset of the ISO/IEC\ 646:1991 standard. The standard developers realized that this new POSIX data interchange format should be very extensible because there were other requirements they foresaw in the near future: .IP " *" 3 Support international character encodings and locale information .LP .IP " *" 3 Support security information (ACLs, and so on) .LP .IP " *" 3 Support future file types, such as realtime or contiguous files .LP .IP " *" 3 Include data areas for implementation use .LP .IP " *" 3 Support systems with words larger than 32 bits and timers with subsecond granularity .LP .LP The following were not goals for this format because these are better handled by separate utilities or are inappropriate for a portable format: .IP " *" 3 Encryption .LP .IP " *" 3 Compression .LP .IP " *" 3 Data translation between locales and codesets .LP .IP " *" 3 \fIinode\fP storage .LP .LP The format chosen to support the goals is an extension of the \fBustar\fP format. Of the two formats previously available, only the \fBustar\fP format was selected for extensions because: .IP " *" 3 It was easier to extend in an upwards-compatible way. It offered version flags and header block type fields with room for future standardization. The \fBcpio\fP format, while possessing a more flexible file naming methodology, could not be extended without breaking some theoretical implementation or using a dummy filename that could be a legitimate filename. .LP .IP " *" 3 Industry experience since the original " \fItar\fP wars" fought in developing the ISO\ POSIX-1 standard has clearly been in favor of the \fBustar\fP format, which is generally the default output format selected for \fIpax\fP implementations on new systems. .LP .LP The new format was designed with one additional goal in mind: reasonable behavior when an older \fItar\fP or \fIpax\fP utility happened to read an archive. Since the POSIX.1-1990 standard mandated that a "format-reading utility" had to treat unrecognized \fItypeflag\fP values as regular files, this allowed the format to include all the extended information in a pseudo-regular file that preceded each real file. An option is given that allows the archive creator to set up reasonable names for these files on the older systems. Also, the normative text suggests that reasonable file access values be used for this \fBustar\fP header block. Making these header files inaccessible for convenient reading and deleting would not be reasonable. File permissions of 600 or 700 are suggested. .LP The \fBustar\fP \fItypeflag\fP field was used to accommodate the additional functionality of the new format rather than magic or version because the POSIX.1-1990 standard (and, by reference, the previous version of \fIpax\fP), mandated the behavior of the format-reading utility when it encountered an unknown \fItypeflag\fP, but was silent about the other two fields. .LP Early proposals of the first revision to IEEE\ Std\ 1003.1-2001 contained a proposed archive format that was based on compatibility with the standard for tape files (ISO\ 1001, similar to the format used historically on many mainframes and minicomputers). This format was overly complex and required considerable overhead in volume and header records. Furthermore, the standard developers felt that it would not be acceptable to the community of POSIX developers, so it was later changed to be a format more closely related to historical practice on POSIX systems. .LP The prefix and name split of pathnames in \fBustar\fP was replaced by the single path extended header record for simplicity. .LP The concept of a global extended header ( \fItypeflag\fP \fBg\fP) was controversial. If this were applied to an archive being recorded on magnetic tape, a few unreadable blocks at the beginning of the tape could be a serious problem; a utility attempting to extract as many files as possible from a damaged archive could lose a large percentage of file header information in this case. However, if the archive were on a reliable medium, such as a CD-ROM, the global extended header offers considerable potential size reductions by eliminating redundant information. Thus, the text warns against using the global method for unreliable media and provides a method for implanting global information in the extended header for each file, rather than in the \fItypeflag\fP \fBg\fP records. .LP No facility for data translation or filtering on a per-file basis is included because the standard developers could not invent an interface that would allow this in an efficient manner. If a filter, such as encryption or compression, is to be applied to all the files, it is more efficient to apply the filter to the entire archive as a single file. The standard developers considered interfaces that would invoke a shell script for each file going into or out of the archive, but the system overhead in this approach was considered to be too high. .LP One such approach would be to have \fBfilter=\fP records that give a pathname for an executable. When the program is invoked, the file and archive would be open for standard input/output and all the header fields would be available as environment variables or command-line arguments. The standard developers did discuss such schemes, but they were omitted from IEEE\ Std\ 1003.1-2001 due to concerns about excessive overhead. Also, the program itself would need to be in the archive if it were to be used portably. .LP There is currently no portable means of identifying the character set(s) used for a file in the file system. Therefore, \fIpax\fP has not been given a mechanism to generate charset records automatically. The only portable means of doing this is for the user to write the archive using the \fB-o\fP \fBcharset=\fP \fIstring\fP command line option. This assumes that all of the files in the archive use the same encoding. The "implementation-defined" text is included to allow for a system that can identify the encodings used for each of its files. .LP The table of standards that accompanies the charset record description is acknowledged to be very limited. Only a limited number of character set standards is reasonable for maximal interchange. Any character set is, of course, possible by prior agreement. It was suggested that EBCDIC be listed, but it was omitted because it is not defined by a formal standard. Formal standards, and then only those with reasonably large followings, can be included here, simply as a matter of practicality. The <\fIvalue\fP>s represent names of officially registered character sets in the format required by the ISO\ 2375:1985 standard. .LP The normal comma or -separated list rules are not followed in the case of keyword options to allow ease of argument parsing for \fIgetopts\fP. .LP Further information on character encodings is in pax Archive Character Set Encoding/Decoding \&. .LP The standard developers have reserved keyword name space for vendor extensions. It is suggested that the format to be used is: .sp .RS .nf \fIVENDOR.keyword\fP .fi .RE .LP where \fIVENDOR\fP is the name of the vendor or organization in all uppercase letters. It is further suggested that the keyword following the period be named differently than any of the standard keywords so that it could be used for future standardization, if appropriate, by omitting the \fIVENDOR\fP prefix. .LP The <\fIlength\fP> field in the extended header record was included to make it simpler to step through the records, even if a record contains an unknown format (to a particular \fIpax\fP) with complex interactions of special characters. It also provides a minor integrity checkpoint within the records to aid a program attempting to recover files from a damaged archive. .LP There are no extended header versions of the \fIdevmajor\fP and \fIdevminor\fP fields because the unspecified format \fBustar\fP header field should be sufficient. If they are not, vendor-specific extended keywords (such as \fIVENDOR.devmajor\fP) should be used. .LP Device and \fIi\fP-number labeling of files was not adopted from \fIcpio\fP; files are interchanged strictly on a symbolic name basis, as in \fBustar\fP. .LP Just as with the \fBustar\fP format descriptions, the new format makes no special arrangements for multi-volume archives. Each of the \fIpax\fP archive types is assumed to be inside a single POSIX file and splitting that file over multiple volumes (diskettes, tape cartridges, and so on), processing their labels, and mounting each in the proper sequence are considered to be implementation details that cannot be described portably. .LP The \fBpax\fP format is intended for interchange, not only for backup on a single (family of) systems. It is not as densely packed as might be possible for backup: .IP " *" 3 It contains information as coded characters that could be coded in binary. .LP .IP " *" 3 It identifies extended records with name fields that could be omitted in favor of a fixed-field layout. .LP .IP " *" 3 It translates names into a portable character set and identifies locale-related information, both of which are probably unnecessary for backup. .LP .LP The requirements on restoring from an archive are slightly different from the historical wording, allowing for non-monolithic privilege to bring forward as much as possible. In particular, attributes such as "high performance file" might be broadly but not universally granted while set-user-ID or \fIchown\fP() might be much more restricted. There is no implication in IEEE\ Std\ 1003.1-2001 that the security information be honored after it is restored to the file hierarchy, in spite of what might be improperly inferred by the silence on that topic. That is a topic for another standard. .LP Links are recorded in the fashion described here because a link can be to any file type. It is desirable in general to be able to restore part of an archive selectively and restore all of those files completely. If the data is not associated with each link, it is not possible to do this. However, the data associated with a file can be large, and when selective restoration is not needed, this can be a significant burden. The archive is structured so that files that have no associated data can always be restored by the name of any link name of any link, and the user may choose whether data is recorded with each instance of a file that contains data. The format permits mixing of both types of links in a single archive; this can be done for special needs, and \fIpax\fP is expected to interpret such archives on input properly, despite the fact that there is no \fIpax\fP option that would force this mixed case on output. (When \fB-o\fP \fBlinkdata\fP is used, the output must contain the duplicate data, but the implementation is free to include it or omit it when \fB-o\fP \fBlinkdata\fP is not used.) .LP The time values are included as extended header records for those implementations needing more than the eleven octal digits allowed by the \fBustar\fP format. Portable file timestamps cannot be negative. If \fIpax\fP encounters a file with a negative timestamp in \fBcopy\fP or \fBwrite\fP mode, it can reject the file, substitute a non-negative timestamp, or generate a non-portable timestamp with a leading \fB'-'\fP . Even though some implementations can support finer file-time granularities than seconds, the normative text requires support only for seconds since the Epoch because the ISO\ POSIX-1 standard states them that way. The \fBustar\fP format includes only \fImtime\fP; the new format adds \fIatime\fP and \fIctime\fP for symmetry. The \fIatime\fP access time restored to the file system will be affected by the \fB-p\fP \fBa\fP and \fB-p\fP \fBe\fP options. The \fIctime\fP creation time (actually \fIinode\fP modification time) is described with "appropriate privilege" so that it can be ignored when writing to the file system. POSIX does not provide a portable means to change file creation time. Nothing is intended to prevent a non-portable implementation of \fIpax\fP from restoring the value. .LP The \fIgid\fP, \fIsize\fP, and \fIuid\fP extended header records were included to allow expansion beyond the sizes specified in the regular \fItar\fP header. New file system architectures are emerging that will exhaust the 12-digit size field. There are probably not many systems requiring more than 8 digits for user and group IDs, but the extended header values were included for completeness, allowing overrides for all of the decimal values in the \fItar\fP header. .LP The standard developers intended to describe the effective results of \fIpax\fP with regard to file ownerships and permissions; implementations are not restricted in timing or sequencing the restoration of such, provided the results are as specified. .LP Much of the text describing the extended headers refers to use in " \fBwrite\fP or \fBcopy\fP modes". The \fBcopy\fP mode references are due to the normative text: "The effect of the copy shall be as if the copied files were written to an archive file and then subsequently extracted ...". There is certainly no way to test whether \fIpax\fP is actually generating the extended headers in \fBcopy\fP mode, but the effects must be as if it had. .SS pax Archive Character Set Encoding/Decoding .LP There is a need to exchange archives of files between systems of different native codesets. Filenames, group names, and user names must be preserved to the fullest extent possible when an archive is read on the receiving platform. Translation of the contents of files is not within the scope of the \fIpax\fP utility. .LP There will also be the need to represent characters that are not available on the receiving platform. These unsupported characters cannot be automatically folded to the local set of characters due to the chance of collisions. This could result in overwriting previous extracted files from the archive or pre-existing files on the system. .LP For these reasons, the codeset used to represent characters within the extended header records of the \fIpax\fP archive must be sufficiently rich to handle all commonly used character sets. The fields requiring translation include, at a minimum, filenames, user names, group names, and link pathnames. Implementations may wish to have localized extended keywords that use non-portable characters. .LP The standard developers considered the following options: .IP " *" 3 The archive creator specifies the well-defined name of the source codeset. The receiver must then recognize the codeset name and perform the appropriate translations to the destination codeset. .LP .IP " *" 3 The archive creator includes within the archive the character mapping table for the source codeset used to encode extended header records. The receiver must then read the character mapping table and perform the appropriate translations to the destination codeset. .LP .IP " *" 3 The archive creator translates the extended header records in the source codeset into a canonical form. The receiver must then perform the appropriate translations to the destination codeset. .LP .LP The approach that incorporates the name of the source codeset poses the problem of codeset name registration, and makes the archive useless to \fIpax\fP archive decoders that do not recognize that codeset. .LP Because parts of an archive may be corrupted, the standard developers felt that including the character map of the source codeset was too fragile. The loss of this one key component could result in making the entire archive useless. (The difference between this and the global extended header decision was that the latter has a workaround-duplicating extended header records on unreliable media-but this would be too burdensome for large character set maps.) .LP Both of the above approaches also put an undue burden on the \fIpax\fP archive receiver to handle the cross-product of all source and destination codesets. .LP To simplify the translation from the source codeset to the canonical form and from the canonical form to the destination codeset, the standard developers decided that the internal representation should be a stateless encoding. A stateless encoding is one where each codepoint has the same meaning, without regard to the decoder being in a specific state. An example of a stateful encoding would be the Japanese Shift-JIS; an example of a stateless encoding would be the ISO/IEC\ 646:1991 standard (equivalent to 7-bit ASCII). .LP For these reasons, the standard developers decided to adopt a canonical format for the representation of file information strings. The obvious, well-endorsed candidate is the ISO/IEC\ 10646-1:2000 standard (based in part on Unicode), which can be used to represent the characters of virtually all standardized character sets. The standard developers initially agreed upon using UCS2 (16-bit Unicode) as the internal representation. This repertoire of characters provides a sufficiently rich set to represent all commonly-used codesets. .LP However, the standard developers found that the 16-bit Unicode representation had some problems. It forced the issue of standardizing byte ordering. The 2-byte length of each character made the extended header records twice as long for the case of strings coded entirely from historical 7-bit ASCII. For these reasons, the standard developers chose the UTF-8 defined in the ISO/IEC\ 10646-1:2000 standard. This multi-byte representation encodes UCS2 or UCS4 characters reliably and deterministically, eliminating the need for a canonical byte ordering. In addition, NUL octets and other characters possibly confusing to POSIX file systems do not appear, except to represent themselves. It was realized that certain national codesets take up more space after the encoding, due to their placement within the UCS range; it was felt that the usefulness of the encoding of the names outweighs the disadvantage of size increase for file, user, and group names. .LP The encoding of UTF-8 is as follows: .sp .RS .nf \fBUCS4 Hex Encoding UTF-8 Binary Encoding .sp 00000000-0000007F 0xxxxxxx 00000080-000007FF 110xxxxx 10xxxxxx 00000800-0000FFFF 1110xxxx 10xxxxxx 10xxxxxx 00010000-001FFFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 00200000-03FFFFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 04000000-7FFFFFFF 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx \fP .fi .RE .LP where each \fB'x'\fP represents a bit value from the character being translated. .SS ustar Interchange Format .LP The description of the \fBustar\fP format reflects numerous enhancements over pre-1988 versions of the historical \fItar\fP utility. The goal of these changes was not only to provide the functional enhancements desired, but also to retain compatibility between new and old versions. This compatibility has been retained. Archives written using the old archive format are compatible with the new format. .LP Implementors should be aware that the previous file format did not include a mechanism to archive directory type files. For this reason, the convention of using a filename ending with slash was adopted to specify a directory on the archive. .LP The total size of the \fIname\fP and \fIprefix\fP fields have been set to meet the minimum requirements for {PATH_MAX}. If a pathname will fit within the \fIname\fP field, it is recommended that the pathname be stored there without the use of the \fIprefix\fP field. Although the name field is known to be too small to contain {PATH_MAX} characters, the value was not changed in this version of the archive file format to retain backwards-compatibility, and instead the prefix was introduced. Also, because of the earlier version of the format, there is no way to remove the restriction on the \fIlinkname\fP field being limited in size to just that of the \fIname\fP field. .LP The \fIsize\fP field is required to be meaningful in all implementation extensions, although it could be zero. This is required so that the data blocks can always be properly counted. .LP It is suggested that if device special files need to be represented that cannot be represented in the standard format, that one of the extension types ( \fBA\fP- \fBZ\fP) be used, and that the additional information for the special file be represented as data and be reflected in the \fIsize\fP field. .LP Attempting to restore a special file type, where it is converted to ordinary data and conflicts with an existing filename, need not be specially detected by the utility. If run as an ordinary user, \fIpax\fP should not be able to overwrite the entries in, for example, \fB/dev\fP in any case (whether the file is converted to another type or not). If run as a privileged user, it should be able to do so, and it would be considered a bug if it did not. The same is true of ordinary data files and similarly named special files; it is impossible to anticipate the needs of the user (who could really intend to overwrite the file), so the behavior should be predictable (and thus regular) and rely on the protection system as required. .LP The value 7 in the \fItypeflag\fP field is intended to define how contiguous files can be stored in a \fBustar\fP archive. IEEE\ Std\ 1003.1-2001 does not require the contiguous file extension, but does define a standard way of archiving such files so that all conforming systems can interpret these file types in a meaningful and consistent manner. On a system that does not support extended file types, the \fIpax\fP utility should do the best it can with the file and go on to the next. .LP The file protection modes are those conventionally used by the \fIls\fP utility. This is extended beyond the usage in the ISO\ POSIX-2 standard to support the "shared text" or "sticky" bit. It is intended that the conformance document should not document anything beyond the existence of and support of such a mode. Further extensions are expected to these bits, particularly with overloading the set-user-ID and set-group-ID flags. .SS cpio Interchange Format .LP The reference to appropriate privilege in the \fBcpio\fP format refers to an error on standard output; the \fBustar\fP format does not make comparable statements. .LP The model for this format was the historical System V \fIcpio\fP \fB-c\fP data interchange format. This model documents the portable version of the \fBcpio\fP format and not the binary version. It has the flexibility to transfer data of any type described within IEEE\ Std\ 1003.1-2001, yet is extensible to transfer data types specific to extensions beyond IEEE\ Std\ 1003.1-2001 (for example, contiguous files). Because it describes existing practice, there is no question of maintaining upwards-compatibility. .SS cpio Header .LP There has been some concern that the size of the \fIc_ino\fP field of the header is too small to handle those systems that have very large \fIinode\fP numbers. However, the \fIc_ino\fP field in the header is used strictly as a hard-link resolution mechanism for archives. It is not necessarily the same value as the \fIinode\fP number of the file in the location from which that file is extracted. .LP The name \fIc_magic\fP is based on historical usage. .SS cpio Filename .LP For most historical implementations of the \fIcpio\fP utility, {PATH_MAX} octets can be used to describe the pathname without the addition of any other header fields (the NUL character would be included in this count). {PATH_MAX} is the minimum value for pathname size, documented as 256 bytes. However, an implementation may use \fIc_namesize\fP to determine the exact length of the pathname. With the current description of the \fI\fP header, this pathname size can be as large as a number that is described in six octal digits. .LP Two values are documented under the \fIc_mode\fP field values to provide for extensibility for known file types: .TP 7 \fB0110\ 000\fP Reserved for contiguous files. The implementation may treat the rest of the information for this archive like a regular file. If this file type is undefined, the implementation may create the file as a regular file. .sp .LP This provides for extensibility of the \fBcpio\fP format while allowing for the ability to read old archives. Files of an unknown type may be read as "regular files" on some implementations. On a system that does not support extended file types, the \fIpax\fP utility should do the best it can with the file and go on to the next. .SH FUTURE DIRECTIONS .LP None. .SH SEE ALSO .LP \fIShell Command Language\fP , \fIcp\fP , \fIed\fP , \fIgetopts\fP , \fIls\fP , \fIprintf\fP() , the Base Definitions volume of IEEE\ Std\ 1003.1-2001, \fI\fP, the System Interfaces volume of IEEE\ Std\ 1003.1-2001, \fIchown\fP(), \fIcreat\fP(), \fImkdir\fP(), \fImkfifo\fP(), \fIstat\fP(), \fIutime\fP(), \fIwrite\fP() .SH COPYRIGHT Portions of this text are reprinted and reproduced in electronic form from IEEE Std 1003.1, 2003 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2003 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between this version and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html . doclifter-2.11/tests/grap.man0000664000175000017500000010474212152465736014371 0ustar esresr.\"-*-nroff-*- .\" This file is (c) 1998-2006 Ted Faber (faber@lunabase.org) see .\" COPYRIGHT for the full copyright and limitations of liabilities. .Dd March 11, 2006 .Os .Dt GRAP 1 .Sh NAME .Nm grap .Nd Kernighan and Bentley's language for typesetting graphs .Sh SYNOPSIS .Nm .Op Fl d Ar defines_file .Op Fl D .Op Fl l .Op Fl M Ar include path .Op Fl R .Op Fl r .Op Fl v .Op Fl u .Op Fl C .Op Fl c .Op Fl h .Op Ar filename ... .Sh DESCRIPTION .Nm is an implementation of Kernighan and Bentley's language for typesetting graphs, as described in ``Grap-A Language for Typesetting Graphs, Tutorial and User Manual,'' by Jon L. Bentley and Brian W. Kernighan, revised May 1991, which is the primary source for information on how to use .Nm grap . As of this writing, it is available electronically at .Li http://www.kohala.com/start/troff/cstr114.ps . Additional documentation and examples, packaged with .Nm , may have been installed locally as well. If available, paths to them can be displayed using .Nm .Fl h or .Nm .Fl v (or .Nm .Fl -help / .Nm .Fl -version ) .Pp This version is a black box implementation of .Nm grap , and some inconsistencies are to be expected. The remainder of this manual page will briefly outline the .Nm language as implemented here. .Pp .Nm is a .Xr pic 1 pre-processor. It takes commands embedded in a .Xr troff 1 source file which are surrounded by .Ic .G1 and .Ic .G2 macros, and rewrites them into .Xr pic commands to display the graph. Other lines are copied. Output is always to the standard output, which is usually redirected. Input is from the given .Ar filename Ns No s , which are read in order. A .Ar filename of .Fl is the standard input. If no .Ar filename Ns No s are given, input is read from the standard input. .Pp Because .Nm is a .Xr pic preprocessor, and GNU .Xr pic will output TeX, it is possible to use .Nm with TeX. .Pp The .Fl d option specifies a file of macro definitions to be read at startup, and defaults to /usr/local/share/grap/grap.defines . The .Fl D option inhibits the reading of any initial macros file (the .Fl l flag is a synonym for .Fl D , though I do not remember why). The defines file can also be given using the .Ev GRAP_DEFINES environment variable. (See below). .Pp .Fl v prints the version information on the standard output and exits. .Fl -version is a synonym for .Fl v . .Pp .Fl u makes labels unaligned by default. This version of .Nm uses new features of GNU .Xr pic to align the left and right labels with the axes, that is that the left and right labels run at right angles to the text of the paper. This may be useful in porting old .Nm programs. .Fl c makes plot strings unclipped by default. Some versions of .Nm allow users to place a string anywhere in the coordinate space, rather than only in the frame. By default this version of .Nm does not plot any string centered outside the frame. .Fl c allows strings to be placed anywhere. See also the .Ic clipped and .Ic unclipped string modifiers described in the .Ic plot statement. .Pp .Fl M is followed by a colon-separated list of directories used to search for relative pathnames included via .Ic copy . The path is also used to locate the defines file, so if the .Fl d changes the defines file name to a relative name, it will be searched for in the path given by .Fl M . The search path always includes the current directory, and by default that directory is searched last. .Pp All numbers used internally by .Nm are double precision floating point values. Sometimes using floating point numbers has unintended consequences. To help avoid these problems, .Nm can use two thresholds for comparison of floating point numbers, set by .Fl R or .Fl r . The .Fl R flag sets coarse comparison mode, which is suitable for most applications. If you are plotting small values \(en less than 1e-6 or so \(en consider using .Fl r which uses very fine comparisons between numbers. You may also want to rescale your plotted values to be larger in magnitude. The coarse comarisons are used by default. .Pp To be precise, the value by which two numbers must differ for .Nm to consider them not equal is called the comparison limit and the smallest non-zero number is called the minimum value. The values a given version of .Nm uses for these are included in the output of .Fl v or .Fl h . .Pp All .Nm commands are included between .Ic .G1 and .Ic .G2 macros, which are consumed by .Nm grap . The output contains .Xr pic between .Ic .PS and .Ic .PE macros. Any arguments to the .Ic .G1 macro in the input are arguments to the .Ic .PS macro in the output, so graphs can be scaled just like .Xr pic diagrams. If .Fl C is given, any macro beginning with \&.G1 or \&.G2 is treated as a \&.G1 or \&.G2 macro, for compatibility with old versions of troff. Using .Fl C also forces pure troff syntax on embedded font change commands when strings have the .Ic size attribute, and all strings to be .Ic unclipped . .Pp The .Fl h flag prints a brief help message and exits. .Fl -help is a synonym for .Fl h . .Pp It is possible for someone to cause .Nm to fail by passing a bad format string and data to the .Ic sprintf command. If .Nm is integrated as part of the printing system, this could conceivably provided a path to breaching security on the machine. If you choose to use .Nm as part of a printing system run by the super-user, you should disable .Ic sprintf commands. This can be done by calling .Nm with the .Fl S flag, setting the .Ev GRAP_SAFER environment variable, or compiling with the GRAP_SAFER preprocessor symbol defined. (The GNU configure script included with .Nm will define that preprocessor symbol if the .Fl -with-grap-safe option is given.) .Pp The .Nm commands are sketched below. Refer to Kernighan and Bentley's paper for the details. .Pp New versions of .Xr groff 1 will invoke .Nm if .Fl G is given. .Ss Commands .Pp Commands are separated from one another by newlines or semicolons (;). .Pp .Ic frame .Op Ar line_description .Oo .Bk -words .Cm ht Ar height No \(or Cm wid Ar width .Ek .Oc .Oo .Bk -words .Oo .Sm off .Cm ( top No \(or Cm bottom No \(or .Cm left No \(or .Sm on .Cm right ) .Ar line_description .Ek .Oc \&... .Oc .sp .Ic frame .Oo .Bk -words .Cm ht Ar height No \(or Cm wid Ar width .Ek .Oc .Op Ar line_description .Oo .Bk -words .Oo .Sm off .Cm ( top No \(or Cm bottom No \(or .Cm left No \(or .Sm on .Cm right ) .Ar line_description .Ek .Oc \&... .Oc .Bd -filled -offset indent This describes how the axes for the graph are drawn. A .Ar line_description is a .Xr pic line description, e.g., .Li dashed .Li 0.5 , or the literal .Li solid . It may also include a .Ic color keyword followed by the color to draw the string in double quotes. Any color understood by the underlying groff system can be used. Color can only be used under GNU pic, and is not available in compatibility mode. Similarly, for pic implementations that understand .Ic thickness , that attribute may be used with a real valued parameter. .Ic Thickness is not available in compatibility mode. .Pp If the first .Ar line_description is given, the frame is drawn with that style. The default is .Li solid . The height and width of the frame can also be specified in inches. The default line style can be over-ridden for sides of the frame by specifying additional parameters to .Ic frame . .Pp If no plotting commands have been given before the .Ic frame command is issued, the frame will be output at that point in the plotting stream relative to embedded .Xr troff or .Xr pic commands. Otherwise the frame is output before the first plotted object (even invisible ones). .Pp .Ic ht and .Ic wid are in inches by default, but can be any .Xr groff unit. If omitted, the dimensions are 2 inches high by 3 inches wide. .Ed .Pp .Ic coord .Op Ar name .Op Cm x Ar expr , expr .Op Cm y Ar expr , expr .Oo .Cm log x No \(or .Cm log y No \(or .Cm log log .Oc .Bd -filled -offset indent The .Ic coord command specifies a new coordinate system or sets limits on the default system. It defines the largest and smallest values that can be plotted, and therefore the scale of the data in the frame. The limits for the x and y coordinate systems can be given separately. If a .Ar name is given, that coordinate system is defined, if not the default system is modified. .Pp A coordinate system created by one .Ic coord command may be modified by subsequent .Ic coord commands. A .Nm program may declare a coordinate space using .Ic coord , .Ic copy a file of data through a macro that plots the data and finds its maxima and minima, and then define the size of the coordinate system with a second .Ic coord statement. .Pp This command also determines if a scale is plotted logarithmically. .Cm log log means the same thing as .Cm log x log y . .Ed .Pp .Ic draw .Op Ar line_name .Op Ar line_description .Op Ar plot_string .Bd -filled -offset indent The .Ic draw command defines the style with which a given line will be plotted. If .Ar line_name is given, the style is associated with that name, otherwise the default style is set. .Ar line_description is a .Xr pic line description, and the optional .Ar plot_string is a string to be centered at each point. The default line description is .Li invis , and the default plotting string is a centered bullet, so by default each point is a filled circle, and they are unconnected. If points are being connected, each .Ic draw command ends any current line and begins a new one. .Pp When defining a line style, that is the first .Ic draw command for a given line name, specifying no plot string means that there are to be no plot strings. Omitting the plot string on subsequent .Ic draw commands addressing the same named line means not to change the plot string. If a line has been defined with a plot string, and the format is changed by a subsequent .Ic draw statement, the plot string can be removed by specifying "" in the .Ic draw statement. .Pp The plot string can have its format changed through several string_modifiers. String_modifiers are described in the description of the .Ic plot command. .Pp The standard defines file includes several macros useful as plot strings, including .Ic bullet , .Ic square , and .Ic delta . .Pp .Ic new is a synonym for .Ic draw . .Ed .Pp .Ic next .Op Ar line_name .Cm at .Op Ar coordinates_name .Ar expr , expr .Op Ar line_description .Bd -filled -offset indent The .Ic next command plots the given point using the line style given by .Ar line_name , or the default if none is given. If .Ar line_name is given, it should have been defined by an earlier .Ic draw command, if not a new line style with that name is created, initialized the same way as the default style. The two expressions give the point's x and y values, relative to the optional coordinate system. That system should have been defined by an earlier .Ic coord command, if not, grap will exit. If the optional .Ar line_description is given, it overrides the style's default line description. You cannot over-ride the plotting string. To use a different plotting string use the .Ic plot command. .Pp The coordinates may optionally be enclosed in parentheses: .Ar ( expr , expr ) .Ed .Pp .Ar quoted_string .Op Ar string_modifiers .Oo .No , Ar quoted_string .Oo .Ar string_modifiers .Oc .Oc \&... .Cm at .Op Ar coordinates_name .Ar expr , expr .Pp .Ic plot .Ar expr .Op Ar format_string .Cm at .Op Ar coordinates_name .Ar expr , expr .Bd -filled -offset indent These commands both plot a string at the given point. In the first case the literal strings are stacked above each other. The string_modifiers include the .Xr pic justification modifiers .Ns No ( Ic ljust , .Ic rjust , .Ic above , and .Ic below Ns No ), and absolute and relative .Li size modifiers. See the .Xr pic documentation for the description of the justification modifiers. .Nm also supports the .Ic aligned and .Ic unaligned modifiers which are briefly noted in the description of the .Ic label command. .Pp The standard defines file includes several macros useful as plot strings, including .Ic bullet , .Ic square , and .Ic delta . .Pp Strings placed by either format of the .Ic plot command are restricted to being within the frame. This can be overridden by using the .Ic unclipped attribute, which allows a string to be plotted in or out of the frame. The .Fl c and .Fl C flags set .Ic unclipped on all strings, and to prevent a string from being plotted outside the frame when those flags are active, the .Ic clipped attribute can be used to retore clipping behavior. Though .Ic clipped or .Ic unclipped can be applied to any string, it only has meaning for .Ic plot statements. .Pp .Li size .Ar expr sets the string size to .Ar expr points. If .Ar expr is preceded by a + or -, the size is increased or decreased by that many points. .Pp If .Ic color and a color name in double quotes appears, the string will be rendered in that color under a version of GNU troff that supports color. Color is not available in compatibility mode. .Pp In the second version, the .Ar expr is converted to a string and placed on the graph. .Ar format_string is a .Xr printf 3 format string. Only formatting escapes for printing floating point numbers make sense. The format string is only respected if the .Ic sprintf command is also active. See the description of .Ic sprintf for the various ways to disable it. .Ic Plot and .Ic sprintf respond differently when .Nm is running safely. .Ic Sprintf ignores any arguments, passing the format string through without substitution. .Ic plot ignores the format string completely, plotting .Ar expr using the .Qq %g format. .Pp Points are specified the same way as for .Ic next commands, with the same consequences for undefined coordinate systems. .Pp The second form of this command is because the first form can be used with a .Nm .Ic sprintf expression (See .Sx Expressions ) . .Ed .Pp .Ic ticks .Sm off .Xo ( Cm left No \(or Cm right .No \(or Cm top No \(or Cm bottom ) .Xc .Oo .Sm on .Xo ( Cm in Ns No \(or Ns Cm out ) .Xc .Op Ar expr .Oc .Sm off .Oo .Cm on \(or Cm auto .Sm on .Ar coord_name .Oc .Pp .Ic ticks .Sm off .Xo ( Cm left No \(or Cm right No \(or Cm top No \(or Cm bottom ) .Xc .Sm on .Xo ( Cm in Ns No \(or Ns Cm out ) .Xc .Op Ar expr .Oo .Cm up Ar expr No \(or .Cm down Ar expr No \(or .Cm left Ar expr No \(or .Cm right Ar expr .Oc .Cm at .Op Ar coord_name .Ar expr .Op Ar format_string .Oo .Oo .No , Ar expr .Oo .Ar format_string .Oc .Oc .No ... .Oc .Pp .Ic ticks .Sm off .Xo ( Cm left No \(or Cm right .No \(or Cm top No \(or Cm bottom ) .Xc .Sm on .Xo ( Cm in Ns No \(or Ns Cm out ) .Xc .Op Ar expr .Oo .Cm up Ar expr No \(or .Cm down Ar expr No \(or .Cm left Ar expr No \(or .Cm right Ar expr .Oc .Cm from .Op coord_name .Ar start_expr .Cm to .Ar end_expr .Oo .Cm by .Sm off .Oo .No + \(or - \(or * \(or / .Sm on .Oc .Ar by_expr .Oc .Op format_string .Pp .Ic ticks .Sm off .Oo .Cm left Xo No \(or Cm right .No \(or Cm top No \(or Cm bottom .Oc .Xc .Sm on .Cm off .Bd -filled -offset indent This command controls the placement of ticks on the frame. By default, ticks are automatically generated on the left and bottom sides of the frame. .Pp The first version of this command turns on the automatic tick generation for a given side. The .Cm in or .Cm out parameter controls the direction and length of the ticks. If a .Ar coord_name is specified, the ticks are automatically generated using that coordinate system. If no system is specified, the default coordinate system is used. As with .Ic next and .Ic plot , the coordinate system must be declared before the .Ic ticks statement that references it. This syntax for requesting automatically generated ticks is an extension, and will not port to older .Nm implementations. .Pp The second version of the .Ic ticks command overrides the automatic placement of the ticks by specifying a list of coordinates at which to place the ticks. If the ticks are not defined with respect to the default coordinate system, the .Ar coord_name parameter must be given. For each tick a .Xr printf 3 style format string can be given. The .Ar format_string defaults to .Qq %g . The format string can also take string modifiers as described in the .Ic plot command. To place ticks with no labels, specify .Ar format_string as .Qq \& . .Pp If .Ic sprintf is disabled, .Ic ticks behaves as .Ic plot with respect to the format string. .Pp The labels on the ticks may be shifted by specifying a direction and the distance in inches to offset the label. That is the optional direction and expression immediately preceding the .Cm at . .Pp The third format of the .Ic ticks command over-rides the default tick generation with a set of ticks ar regular intervals. The syntax is reminiscent of programming language for loops. Ticks are placed starting at .Ar start_expr ending at .Ar end_expr one unit apart. If the .Cm by clause is specified, ticks are .Ar by_expr units apart. If an operator appears before .Ar by_expr each tick is operated on by that operator instead of +. For example .Bd -literal -offset indent-two ticks left out from 2 to 32 by *2 .Ed .Pp will put ticks at 2, 4, 8, 16, and 32. If .Ar format_string is specified, all ticks are formatted using it. .Pp The parameters preceding the .Cm from act as described above. .Pp The .Cm at and .Cm for forms of tick command may both be issued on the same side of a frame. For example: .Bd -literal -offset indent-two ticks left out from 2 to 32 by *2 ticks left in 3, 5, 7 .Ed .Pp will put ticks on the left side of the frame pointing out at 2, 4, 8, 16, and 32 and in at 3, 5, and 7. .Pp The final form of .Ic ticks turns off ticks on a given side. If no side is given the ticks for all sides are cancelled. .Pp .Ic tick is a synonym for .Ic ticks . .Ed .Pp .Ic grid .Sm off .Xo ( Cm left No \(or Cm right .No \(or Cm top No \(or Cm bottom ) .Xc .Sm on .Op Li ticks off .Op Ar line_description .Oo .Cm up Ar expr No \(or .Cm down Ar expr No \(or .Cm left Ar expr No \(or .Cm right Ar expr .Oc .Oo .Sm off .Cm on \(or Cm auto .Sm on .Op Ar coord_name .Oc .Pp .Ic grid .Sm off .Xo ( Cm left No \(or Cm right .No \(or Cm top No \(or Cm bottom ) .Xc .Sm on .Op Li ticks off .Op Ar line_description .Oo .Cm up Ar expr No \(or .Cm down Ar expr No \(or .Cm left Ar expr No \(or .Cm right Ar expr .Oc .Cm at .Op Ar coord_name .Ar expr .Op Ar format_string .Oo .Oo .No , Ar expr .Oo .Ar format_string .Oc .Oc .No ... .Oc .Pp .Ic grid .Sm off .Xo ( Cm left No \(or Cm right .No \(or Cm top No \(or Cm bottom ) .Xc .Sm on .Op Li ticks off .Op Ar line_description .Oo .Cm up Ar expr No \(or .Cm down Ar expr No \(or .Cm left Ar expr No \(or .Cm right Ar expr .Oc .Cm from .Op coord_name .Ar start_expr .Cm to .Ar end_expr .Oo .Cm by .Sm off .Oo .No + \(or - \(or * \(or / .Sm on .Oc .Ar by_expr .Oc .Op format_string .Bd -filled -offset indent The .Ic grid command is similar to the .Ic ticks command except that .Ic grid specifies the placement of lines in the frame. The syntax is similar to .Ic ticks as well. .Pp By specifying .Li ticks off in the command, no ticks are drawn on that side of the frame. If ticks appear on a side by default, or have been declared by an earlier .Ic ticks command, .Ic grid does not cancel them unless .Li ticks off is specified. .Pp Instead of a direction for ticks, .Ic grid allows the user to pick a line description for the grid lines. The usual .Xr pic line descriptions are allowed. .Pp Grids are labelled by default. To omit labels, specify the format string as .Qq \& . .Pp If .Ic sprintf is disabled, .Ic grid behaves as .Ic plot with respect to the format string. .Ed .Pp .Ic label .Sm off .Xo ( Cm left No \(or Cm right .No \(or Cm top No \(or Cm bottom ) .Xc .Sm on .Ar quoted_string .Op Ar string_modifiers .Oo .No , Ar quoted_string .Oo .Ar string_modifiers .Oc .Oc \&... .Oo .Cm up Ar expr No \(or .Cm down Ar expr No \(or .Cm left Ar expr No \(or .Cm right Ar expr .Oc .Bd -filled -offset indent The .Ic label command places a label on the given axis. It is possible to specify several labels, which will be stacked over each other as in .Xr pic . The final argument, if present, specifies how many inches the label is shifted from the axis. .Pp By default the labels on the left and right labels run parallel to the frame. You can cancel this by specifying .Li unaligned as a .Ar string_modifier . .Ed .Pp .Ic circle .Cm at .Op Ar coordinate_name .Ar expr , expr .Op Cm radius Ar expr .Op Ar linedesc .Bd -filled -offset indent This draws an circle at the point indicated. By default, the circle is small, 0.025 inches. This can be over-ridden by specifying a radius. The coordinates of the point are relative to the named coordinate system, or the default system if none is specified. .Pp This command has been extended to take a line description, e.g., .Li dotted . It also accepts the filling extensions described below in the .Ic bar command. It will also accept a .Ic color keyword that gives the color of the outline of the circle in double quotes and a .Ic fillcolor command that sets the color to fill the circle with similarly. Colors are only available when compatibility mode is off, and using a version of GNU pic that supports color. .Ed .Pp .Ic line .Op Ar line_description .Cm from .Op Ar coordinate_name .Ar expr , expr .Cm to .Op Ar coordinate_name .Ar expr , expr .Op Ar line_description .Pp .Ic arrow .Op Ar line_description .Cm from .Op Ar coordinate_name .Ar expr , expr .Cm to .Op Ar coordinate_name .Ar expr , expr .Op Ar line_description .Bd -filled -offset indent This draws a line or arrow from the first point to the second using the given style. The default line style is .Li solid . The .Ar line_description can be given either before the .Cm from or after the .Cm to clause. If both are given the second is used. It is possible to specify one point in one coordinate system and one in another, note that if both points are in a named coordinate system (even if they are in the same named coordinate system), both points must have .Ar coordinate_name given. .Ed .Pp .Pp .Ic copy .Op Qq Ar filename .Op Cm until Qq Ar string .Op Cm thru Ar macro .Bd -filled -offset indent The .Ic copy command imports data from another file into the current graph. The form with only a filename given is a simple file inclusion; the included file is simply read into the input stream and can contain arbitrary .Nm commands. The more common case is that it is a number list; see .Sx Number Lists below. .Pp The second form takes lines from the file, splits them into words delimited by one or more spaces, and calls the given macro with those words as parameters. The macro may either be defined here, or be a macro defined earlier. See .Sx Macros for more information on macros. .Pp The .Ar filename may be omitted if the .Cm until clause is present. If so the current file is treated as the input file until .Ar string is encountered at the beginning of the line. .Pp .Ic copy is one of the workhorses of .Nm grap . Check out the paper and .Pa /usr/local/share/examples/grap for more details. Confirm the location of the examples directory using the .Fl v flag. .Ed .Ic print .Sm off .Ar ( expr \(or string ) .Sm on .Bd -filled -offset indent Prints its argument to the standard error. .Ed .Pp .Ic sh Ar block .Bd -filled -offset indent This passes .Ar block to .Xr sh 1 . Unlike K&B .Nm no macro or variable expansion is done. I believe that this is also true for GNU .Xr pic version 1.10. See the .Sx Macros section for information on defining blocks. .Ed .Pp .Ic pic Ar pic_statement .Bd -filled -offset indent This issues the given .Xr pic statements in the enclosing .Ic .PS and .Ic .PE at the point where the command is issued. .Pp Statements that begin with a period are considered to be .Xr troff statements and are output in the enclosing .Ic .PS and .Ic .PE at the point where the command appears. .Pp For the purposes of relative placement of .Xr pic or .Xr troff commands, the frame is output immediately before the first plotted object, or the .Ic frame statement, if any. If the user specifies .Xr pic or .Xr troff commands and neither any plotable object nor a .Ic frame command, the commands will not be output. .Ed .Pp .Ic graph Ar Name pic_commands .Bd -filled -offset indent This command is used to position graphs with respect to each other. The current graph is given the .Xr pic name .Ar Name (names used by .Xr pic begin with capital letters). Any .Xr pic commands following the graph are used to position the next graph. The frame of the graph is available for use with .Xr pic name .Li Frame. The following places a second graph below the first: .Bd -literal -offset indent-two graph Linear [ graph description ] graph Exponential with .Frame.n at \\ Linear.Frame.s - (0, .05) [ graph description ] .Ed .Ed .Pp .Ar name = expr .Bd -filled -offset indent This assigns .Ar expr to the variable .Ar name . .Nm has only numeric (double) variables. .Pp Assignment creates a variable if it does not exist. Variables persist across graphs. Assignments can cascade; .Li a = b = 35 assigns 35 to .Li a and .Li b . .Ed .Pp .Ic bar .Sm off .No ( Cm up No \(or Cm right ) .Sm on .Op Ar coordinates_name .Ar offset .Cm ht .Ar height .Op Cm wid Ar width .Op Cm base Ar base_offset .Op Ar line_description .Pp .Ic bar .Op Ar coordinates_name .Ar expr , expr , .Op Ar coordinates_name .Ar expr , expr , .Op Ar line_description .Bd -filled -offset indent The .Ic bar command facilitates drawing bar graphs. The first form of the command describes the bar somewhat generally and has .Nm place it. The bar may extend up or to the right, is centered on .Ar offset and extends up or right .Ar height units (in the given coordinate system). For example .Bd -literal -offset indent-two bar up 3 ht 2 .Ed .Pp draws a 2 unit high bar sitting on the x axis, centered on x=3. By default bars are 1 unit wide, but this can be changed with the .Ic wid keyword. By default bars sit on the base axis, i.e., bars directed up will extend from y=0. That may be overridden by the .Ic base keyword. (The bar described above has corners (2.5, 0) and (3.5, 2).) .Pp The line description has been extended to include a .Ic fill Ar expr keyword that specifies the shading inside the bar. Bars may be drawn in any line style. They support the .Ic color and .Ic fillcolor keywords described under .Ic circle . .Pp The second form of the command draws a box with the two points as corners. This can be used to draw boxes highlighting certain data as well as bar graphs. Note that filled bars will cover data drawn under them. .Ed .Ss Control Flow .Pp .Ic if Ar expr Ic then Ar block .Op Ic else Ar block .Bd -filled -offset indent The .Ic if statement provides simple conditional execution. If .Ar expr is non-zero, the .Ar block after the .Ic then statement is executed. If not the .Ar block after the .Ic else is executed, if present. See .Sx Macros for the definition of blocks. Early versions of this implementation of .Nm treated the blocks as macros that were defined and expanded in place. This led to unnecessary confusion because explicit separators were sometimes called for. Now, .Nm inserts a separator (;) after the last character in .Ar block , so constructs like .Bd -literal if (x == 3) { y = y + 1 } x = x + 1 .Ed behave as expected. A separator is also appended to the end of a .Ic for block. .Ed .Pp .Ic for Ar name Ic from Ar from_expr Ic to Ar to_expr .Oo .Ic by .Op No +\(or-\(or*\(or/ .Ar by_expr .Oc .Ic do .Ar block .Bd -filled -offset indent This command executes .Ar block iteratively. The variable .Ar name is set to .Ar from_expr and incremented by .Ar by_expr until it exceeds .Ar to_expr . The iteration has the semantics defined in the .Ic ticks command. The definition of .Ar block is discussed in .Sx Macros . See also the note about implicit separators in the description of the .Ic if command. .Pp An .Ic = can be used in place of .Ic from . .Ed .Ss Expressions .Pp .Nm supports most standard arithmetic operators: + - / * ^. The carat (^) is exponentiation. In an .Ic if statement .Nm also supports the C logical operators ==, !=, &&, || and unary !. Also in an .Ic if , == and != are overloaded for the comparison of quoted strings. Parentheses are used for grouping. .Pp Assignment is not allowed in an expression in any context, except for simple cascading of assignments. .Li a = b = 35 works as expected; .Li a = 3.5 * (b = 10) does not execute. .Pp .Nm supports the following functions that take one argument: .Ic log , exp , int , sin , cos , sqrt , rand . The logarithms are base 10 and the trigonometric functions are in radians. .Ic eexp returns Euler's number to the given power and .Ic ln returns the natural logarithm. The natural log and exponentiation functions are extensions and are probably not available in other .Nm implementations. .Pp .Ic rand returns a random number uniformly distributed on [0,1). The following two-argument functions are supported: .Ic atan2 , min , max . .Ic atan2 works just like .Xr atan2 3 . The random number generator can be seeded by calling .Ic srand with a single parameter (converted internally to an integer). Because its return value is of no use, you must use .Ic srand as a separate statement, it is not part of a valid expression. .Ic srand is not portable. .Pp The .Ic getpid function takes no arguments and returns the process id. This may be used to seed the random number generator, but do not expect cryptographically random values to result. .Pp Other than string comparison, no expressions can use strings. One string valued function exists: .Ic sprintf ( Ar format , .Oo .Ar expr .Op Ar \&, expr .Oc ). It operates like .Xr sprintf 3 , except returning the value. It can be used anywhere a quoted string is used. If .Nm is run with .Fl S , the environment variable .Ev GRAP_SAFER is defined, or .Nm has been compiled for safer operation, the .Ic sprintf command will return the format string. This mode of operation is only intended to be used only if .Nm is being used as part of a super-user enabled print system. .Ss Macros .Nm has a simple but powerful macro facility. Macros are defined using the .Ic define command : .Pp .Ic define Ar name block .br .Ic undefine Ar name .Bd -filled -offset indent Every occurrence of .Ar name in the program text is replaced by the contents of .Ar block . .Ar block is defined by a series of statements in nested { }'s, or a series of statements surrounded by the same letter. An example of the latter is .Bd -literal -offset indent-two define foo X coord x 1,3 X .Ed Each time .Li foo appears in the text, it will be replaced by .Li coord x 1,3 . Macros are literal, and can contain newlines. If a macro does not span multiple lines, it should end in a semicolon to avoid parsing errors. .Pp Macros can take parameters, too. If a macro call is followed by a parenthesized, comma-separated list the values starting with $1 will be replaced in the macro with the elements of the list. A $ not followed by a digit is left unchanged. This parsing is very rudimentary; no nesting or parentheses or escaping of commas is allowed. Also, there is no way to say argument 1 followed by a digit (${1}0 in sh(1)). .Pp The following will draw a line with slope 1. .Bd -literal -offset indent-two define foo { next at $1, $2 } for i from 1 to 5 { foo(i,i) } .Ed Macros persist across graphs. The file .Pa /usr/local/share/grap/grap.defines contains simple macros for plotting common characters. The .Ic undefine command deletes a macro. .Pp See the directory .Pa /usr/local/share/examples/grap for more examples of macros. Confirm the location of the examples directory using the .Fl v flag. .Ed .Ss Number Lists .Pp A whitespace-separated list of numbers is treated specially. The list is taken to be points to be plotted using the default line style on the default coordinate system. If more than two numbers are given, the extra numbers are taken to be additional y values to plot at the first x value. Number lists in DWB .Nm can be comma-separated, and this .Nm supports that as well. More precisely, numbers in number lists can be separated by either whitespace, commas, or both. .Bd -literal -offset indent 1 2 3 4 5 6 .Ed .sp Will plot points using the default line style at (1,2), (1,3),(4,5) and (4,6). A simple way to plot a set of numbers in a file named .Pa ./data is: .Bd -literal -offset indent \&.G1 copy "./data" \&.G2 .Ed .Ss Pic Macros .Pp .Nm defines pic macros that can be used in embedded pic code to place elements in the graph. The macros are .Ic x_gg , .Ic y_gg , and .Ic xy_gg . These macros define pic distances that correspond to the given argument. They can be used to size boxes or to plot pic constructs on the graph. To place a given construct on the graph, you should add Frame.Origin to it. Other coordinate spaces can be used by replacing .Ic gg with the name of the coordinate space. A coordinate space named .Ic gg cannot be reliably accessed by these macros. .Pp The macros are emitted immediately before the frame is drawn. .Pp DWB .Nm may use these as part of its implementation. This .Nm provides them only for compatibility. Note that these are very simple macros, and may not do what you expect under complex conditions. .Sh ENVIRONMENT VARIABLES .Pp If the environment variable .Ev GRAP_DEFINES is defined, .Nm will look for its defines file there. If that value is a relative path name the path specified in the .Fl M option will be searched for it. .Ev GRAP_DEFINES overrides the compiled in location of the defines file, but may be overridden by the .Fl d or .Fl D flags. .Pp If .Ev GRAP_SAFER is set, .Ic sprintf is disabled to prevent forcing .Nm to core dump or smash the stack. .Sh FILES .Pa /usr/local/share/grap/grap.defines .Sh SEE ALSO .Xr atan2 3 , .Xr groff 1 , .Xr pic 1 , .Xr printf 3 , .Xr sh 1 , .Xr sprintf 3 , .Xr troff 1 .Pp If documentation and examples have been installed, .Nm .Fl -version or .Nm .Fl -help will display the locations. .Sh BUGS .Pp There are several small incompatibilities with K&R .Nm grap . They include the .Ic sh command not expanding variables and macros, and a more strict adherence to parameter order in the internal commands. .Pp Although much improved, the error reporting code can still be confused. Notably, an error in a macro is not detected until the macro is used, and it produces unusual output in the error message. .Pp Iterating many times over a macro with no newlines can run .Nm out of memory. .Sh AUTHOR This implementation was done by .An Ted Faber Ao faber@lunabase.org Ac Ns . .An Bruce Lilly Ao blilly@erols.com Ac contributed many bug fixes, including a considerable revamp of the error reporting code. If you can actually find an error in your .Nm code, you can probably thank him. .Nm was designed and specified by .An Brian Kernighan and .An Jon Bentley . doclifter-2.11/tests/docliftertest1.chk0000664000175000017500000000410412152465736016355 0ustar esresr <?xml version="1.0" encoding="ISO-8859-1" standalone="no"?> <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"> <svg version="1.1" baseProfile="full" id="body" width="8in" height="8in" viewBox="0 0 1 1" preserveAspectRatio="none" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ev="http://www.w3.org/2001/xml-events"> <title>SVG drawing</title> <desc>This was produced by version 4.4 of GNU libplot, a free library for exporting 2-D vector graphics.</desc> <rect id="background" x="0" y="0" width="1" height="1" stroke="none" fill="white"/> <g id="content" transform="translate(0.45312,0.5) scale(1,-1) scale(0.125) " xml:space="preserve" stroke="black" stroke-linecap="butt" stroke-linejoin="miter" stroke-miterlimit="10.433" stroke-dasharray="none" stroke-dashoffset="0" stroke-opacity="1" fill="none" fill-rule="evenodd" fill-opacity="1" font-style="normal" font-variant="normal" font-weight="normal" font-stretch="normal" font-size-adjust="none" letter-spacing="normal" word-spacing="normal" text-anchor="start"> <rect x="0" y="-0.25" width="0.75" height="0.5" stroke-width="0.0094118" /> <text transform="translate(0.26306,-0.049028) scale(1,-1) scale(0.0069444) " font-family="Helvetica,sans-serif" font-size="20px" stroke="none" fill="black" >box</text> </g> </svg> Description This empty page was brought to you by brain damage somewhere in POD, the Perl build system, or the Perl maintainers' release procedures. doclifter-2.11/tests/stringwidth.man0000664000175000017500000000055712152465736016005 0ustar esresr.TH PRINTF 3 2011-09-28 "GNU" "Linux Programmer's Manual" .SH NAME printf, fprintf, sprintf, snprintf, vprintf, vfprintf, vsprintf, vsnprintf \- formatted output conversion .SH DESCRIPTION This is a stripped manpage intended to test a common evaluation case of the \\w conditiol in groff. .SH EXAMPLE .if \w'\*(Pi'=0 .ds Pi pi To print \*(Pi to five decimal places: doclifter-2.11/tests/capabilities.man0000664000175000017500000006651112152465736016072 0ustar esresr.\" Copyright (c) 2002 by Michael Kerrisk .\" .\" Permission is granted to make and distribute verbatim copies of this .\" manual provided the copyright notice and this permission notice are .\" preserved on all copies. .\" .\" Permission is granted to copy and distribute modified versions of this .\" manual under the conditions for verbatim copying, provided that the .\" entire resulting derived work is distributed under the terms of a .\" permission notice identical to this one. .\" .\" Since the Linux kernel and libraries are constantly changing, this .\" manual page may be incorrect or out-of-date. The author(s) assume no .\" responsibility for errors or omissions, or for damages resulting from .\" the use of the information contained herein. The author(s) may not .\" have taken the same level of care in the production of this manual, .\" which is licensed free of charge, as they might when working .\" professionally. .\" .\" Formatted or processed versions of this manual, if unaccompanied by .\" the source, must acknowledge the copyright and authors of this work. .\" .\" 6 Aug 2002 - Initial Creation .\" Modified 2003-05-23, Michael Kerrisk, .\" Modified 2004-05-27, Michael Kerrisk, .\" 2004-12-08, mtk Added O_NOATIME for CAP_FOWNER .\" 2005-08-16, mtk, Added CAP_AUDIT_CONTROL and CAP_AUDIT_WRITE .\" 2008-07-15, Serge Hallyn .\" Document file capabilities, per-process capability .\" bounding set, changed semantics for CAP_SETPCAP, .\" and other changes in 2.6.2[45]. .\" Add CAP_MAC_ADMIN, CAP_MAC_OVERRIDE, CAP_SETFCAP. .\" 2008-07-15, mtk .\" Add text describing circumstances in which CAP_SETPCAP .\" (theoretically) permits a thread to change the .\" capability sets of another thread. .\" Add section describing rules for programmatically .\" adjusting thread capability sets. .\" Describe rationale for capability bounding set. .\" Document "securebits" flags. .\" Add text noting that if we set the effective flag for one file .\" capability, then we must also set the effective flag for all .\" other capabilities where the permitted or inheritable bit is set. .\" 2011-09-07, mtk/Serge hallyn: Add CAP_SYSLOG .\" FIXME: Linux 3.0 added CAP_WAKE_ALARM .\" .TH CAPABILITIES 7 2011-10-04 "Linux" "Linux Programmer's Manual" .SH NAME capabilities \- overview of Linux capabilities .SH DESCRIPTION For the purpose of performing permission checks, traditional UNIX implementations distinguish two categories of processes: .I privileged processes (whose effective user ID is 0, referred to as superuser or root), and .I unprivileged processes (whose effective UID is nonzero). Privileged processes bypass all kernel permission checks, while unprivileged processes are subject to full permission checking based on the process's credentials (usually: effective UID, effective GID, and supplementary group list). Starting with kernel 2.2, Linux divides the privileges traditionally associated with superuser into distinct units, known as .IR capabilities , which can be independently enabled and disabled. Capabilities are a per-thread attribute. .\" .SS Capabilities List The following list shows the capabilities implemented on Linux, and the operations or behaviors that each capability permits: .TP .BR CAP_AUDIT_CONTROL " (since Linux 2.6.11)" Enable and disable kernel auditing; change auditing filter rules; retrieve auditing status and filtering rules. .TP .BR CAP_AUDIT_WRITE " (since Linux 2.6.11)" Write records to kernel auditing log. .TP .B CAP_CHOWN Make arbitrary changes to file UIDs and GIDs (see .BR chown (2)). .TP .B CAP_DAC_OVERRIDE Bypass file read, write, and execute permission checks. (DAC is an abbreviation of "discretionary access control".) .TP .B CAP_DAC_READ_SEARCH Bypass file read permission checks and directory read and execute permission checks. .TP .B CAP_FOWNER .PD 0 .RS .IP * 2 Bypass permission checks on operations that normally require the file system UID of the process to match the UID of the file (e.g., .BR chmod (2), .BR utime (2)), excluding those operations covered by .B CAP_DAC_OVERRIDE and .BR CAP_DAC_READ_SEARCH ; .IP * set extended file attributes (see .BR chattr (1)) on arbitrary files; .IP * set Access Control Lists (ACLs) on arbitrary files; .IP * ignore directory sticky bit on file deletion; .IP * specify .B O_NOATIME for arbitrary files in .BR open (2) and .BR fcntl (2). .RE .PD .TP .B CAP_FSETID Don't clear set-user-ID and set-group-ID permission bits when a file is modified; set the set-group-ID bit for a file whose GID does not match the file system or any of the supplementary GIDs of the calling process. .TP .B CAP_IPC_LOCK Lock memory .RB ( mlock (2), .BR mlockall (2), .BR mmap (2), .BR shmctl (2)). .TP .B CAP_IPC_OWNER Bypass permission checks for operations on System V IPC objects. .TP .B CAP_KILL Bypass permission checks for sending signals (see .BR kill (2)). This includes use of the .BR ioctl (2) .B KDSIGACCEPT operation. .\" FIXME CAP_KILL also has an effect for threads + setting child .\" termination signal to other than SIGCHLD: without this .\" capability, the termination signal reverts to SIGCHLD .\" if the child does an exec(). What is the rationale .\" for this? .TP .BR CAP_LEASE " (since Linux 2.4)" Establish leases on arbitrary files (see .BR fcntl (2)). .TP .B CAP_LINUX_IMMUTABLE Set the .B FS_APPEND_FL and .B FS_IMMUTABLE_FL .\" These attributes are now available on ext2, ext3, Reiserfs, XFS, JFS i-node flags (see .BR chattr (1)). .TP .BR CAP_MAC_ADMIN " (since Linux 2.6.25)" Override Mandatory Access Control (MAC). Implemented for the Smack Linux Security Module (LSM). .TP .BR CAP_MAC_OVERRIDE " (since Linux 2.6.25)" Allow MAC configuration or state changes. Implemented for the Smack LSM. .TP .BR CAP_MKNOD " (since Linux 2.4)" Create special files using .BR mknod (2). .TP .B CAP_NET_ADMIN Perform various network-related operations (e.g., setting privileged socket options, enabling multicasting, interface configuration, modifying routing tables). .TP .B CAP_NET_BIND_SERVICE Bind a socket to Internet domain privileged ports (port numbers less than 1024). .TP .B CAP_NET_BROADCAST (Unused) Make socket broadcasts, and listen to multicasts. .TP .B CAP_NET_RAW Use RAW and PACKET sockets. .\" Also various IP options and setsockopt(SO_BINDTODEVICE) .TP .B CAP_SETGID Make arbitrary manipulations of process GIDs and supplementary GID list; forge GID when passing socket credentials via UNIX domain sockets. .TP .BR CAP_SETFCAP " (since Linux 2.6.24)" Set file capabilities. .TP .B CAP_SETPCAP If file capabilities are not supported: grant or remove any capability in the caller's permitted capability set to or from any other process. (This property of .B CAP_SETPCAP is not available when the kernel is configured to support file capabilities, since .B CAP_SETPCAP has entirely different semantics for such kernels.) If file capabilities are supported: add any capability from the calling thread's bounding set to its inheritable set; drop capabilities from the bounding set (via .BR prctl (2) .BR PR_CAPBSET_DROP ); make changes to the .I securebits flags. .TP .B CAP_SETUID Make arbitrary manipulations of process UIDs .RB ( setuid (2), .BR setreuid (2), .BR setresuid (2), .BR setfsuid (2)); make forged UID when passing socket credentials via UNIX domain sockets. .\" FIXME CAP_SETUID also an effect in exec(); document this. .TP .B CAP_SYS_ADMIN .PD 0 .RS .IP * 2 Perform a range of system administration operations including: .BR quotactl (2), .BR mount (2), .BR umount (2), .BR swapon (2), .BR swapoff (2), .BR sethostname (2), and .BR setdomainname (2); .IP * perform privileged .BR syslog (2) operations (since Linux 2.6.37, .BR CAP_SYSLOG should be used to permit such operations); .IP * perform .B IPC_SET and .B IPC_RMID operations on arbitrary System V IPC objects; .IP * perform operations on .I trusted and .I security Extended Attributes (see .BR attr (5)); .IP * use .BR lookup_dcookie (2); .IP * use .BR ioprio_set (2) to assign .B IOPRIO_CLASS_RT and (before Linux 2.6.25) .B IOPRIO_CLASS_IDLE I/O scheduling classes; .IP * forge UID when passing socket credentials; .IP * exceed .IR /proc/sys/fs/file-max , the system-wide limit on the number of open files, in system calls that open files (e.g., .BR accept (2), .BR execve (2), .BR open (2), .BR pipe (2)); .IP * employ .B CLONE_NEWNS flag with .BR clone (2) and .BR unshare (2); .IP * call .BR setns (2); .IP * perform .B KEYCTL_CHOWN and .B KEYCTL_SETPERM .BR keyctl (2) operations; .IP * perform .BR madvise (2) .B MADV_HWPOISON operation. .RE .PD .TP .B CAP_SYS_BOOT Use .BR reboot (2) and .BR kexec_load (2). .TP .B CAP_SYS_CHROOT Use .BR chroot (2). .TP .B CAP_SYS_MODULE Load and unload kernel modules (see .BR init_module (2) and .BR delete_module (2)); in kernels before 2.6.25: drop capabilities from the system-wide capability bounding set. .TP .B CAP_SYS_NICE .PD 0 .RS .IP * 2 Raise process nice value .RB ( nice (2), .BR setpriority (2)) and change the nice value for arbitrary processes; .IP * set real-time scheduling policies for calling process, and set scheduling policies and priorities for arbitrary processes .RB ( sched_setscheduler (2), .BR sched_setparam (2)); .IP * set CPU affinity for arbitrary processes .RB ( sched_setaffinity (2)); .IP * set I/O scheduling class and priority for arbitrary processes .RB ( ioprio_set (2)); .IP * apply .BR migrate_pages (2) to arbitrary processes and allow processes to be migrated to arbitrary nodes; .\" FIXME CAP_SYS_NICE also has the following effect for .\" migrate_pages(2): .\" do_migrate_pages(mm, &old, &new, .\" capable(CAP_SYS_NICE) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE); .IP * apply .BR move_pages (2) to arbitrary processes; .IP * use the .B MPOL_MF_MOVE_ALL flag with .BR mbind (2) and .BR move_pages (2). .RE .PD .TP .B CAP_SYS_PACCT Use .BR acct (2). .TP .B CAP_SYS_PTRACE Trace arbitrary processes using .BR ptrace (2); apply .BR get_robust_list (2) to arbitrary processes. .TP .B CAP_SYS_RAWIO Perform I/O port operations .RB ( iopl (2) and .BR ioperm (2)); access .IR /proc/kcore . .TP .B CAP_SYS_RESOURCE .PD 0 .RS .IP * 2 Use reserved space on ext2 file systems; .IP * make .BR ioctl (2) calls controlling ext3 journaling; .IP * override disk quota limits; .IP * increase resource limits (see .BR setrlimit (2)); .IP * override .B RLIMIT_NPROC resource limit; .IP * raise .I msg_qbytes limit for a System V message queue above the limit in .I /proc/sys/kernel/msgmnb (see .BR msgop (2) and .BR msgctl (2)). .IP * use .BR F_SETPIPE_SZ to increase the capacity of a pipe above the limit specified by .IR /proc/sys/fs/pipe-max-size . .RE .PD .TP .B CAP_SYS_TIME Set system clock .RB ( settimeofday (2), .BR stime (2), .BR adjtimex (2)); set real-time (hardware) clock. .TP .B CAP_SYS_TTY_CONFIG Use .BR vhangup (2). .TP .BR CAP_SYSLOG " (since Linux 2.6.37)" Perform privileged .BR syslog (2) operations. See .BR syslog (2) for information on which operations require privilege. .\" .SS Past and Current Implementation A full implementation of capabilities requires that: .IP 1. 3 For all privileged operations, the kernel must check whether the thread has the required capability in its effective set. .IP 2. The kernel must provide system calls allowing a thread's capability sets to be changed and retrieved. .IP 3. The file system must support attaching capabilities to an executable file, so that a process gains those capabilities when the file is executed. .PP Before kernel 2.6.24, only the first two of these requirements are met; since kernel 2.6.24, all three requirements are met. .\" .SS Thread Capability Sets Each thread has three capability sets containing zero or more of the above capabilities: .TP .IR Permitted : This is a limiting superset for the effective capabilities that the thread may assume. It is also a limiting superset for the capabilities that may be added to the inheritable set by a thread that does not have the .B CAP_SETPCAP capability in its effective set. If a thread drops a capability from its permitted set, it can never reacquire that capability (unless it .BR execve (2)s either a set-user-ID-root program, or a program whose associated file capabilities grant that capability). .TP .IR Inheritable : This is a set of capabilities preserved across an .BR execve (2). It provides a mechanism for a process to assign capabilities to the permitted set of the new program during an .BR execve (2). .TP .IR Effective : This is the set of capabilities used by the kernel to perform permission checks for the thread. .PP A child created via .BR fork (2) inherits copies of its parent's capability sets. See below for a discussion of the treatment of capabilities during .BR execve (2). .PP Using .BR capset (2), a thread may manipulate its own capability sets (see below). .\" .SS File Capabilities Since kernel 2.6.24, the kernel supports associating capability sets with an executable file using .BR setcap (8). The file capability sets are stored in an extended attribute (see .BR setxattr (2)) named .IR "security.capability" . Writing to this extended attribute requires the .BR CAP_SETFCAP capability. The file capability sets, in conjunction with the capability sets of the thread, determine the capabilities of a thread after an .BR execve (2). The three file capability sets are: .TP .IR Permitted " (formerly known as " forced ): These capabilities are automatically permitted to the thread, regardless of the thread's inheritable capabilities. .TP .IR Inheritable " (formerly known as " allowed ): This set is ANDed with the thread's inheritable set to determine which inheritable capabilities are enabled in the permitted set of the thread after the .BR execve (2). .TP .IR Effective : This is not a set, but rather just a single bit. If this bit is set, then during an .BR execve (2) all of the new permitted capabilities for the thread are also raised in the effective set. If this bit is not set, then after an .BR execve (2), none of the new permitted capabilities is in the new effective set. Enabling the file effective capability bit implies that any file permitted or inheritable capability that causes a thread to acquire the corresponding permitted capability during an .BR execve (2) (see the transformation rules described below) will also acquire that capability in its effective set. Therefore, when assigning capabilities to a file .RB ( setcap (8), .BR cap_set_file (3), .BR cap_set_fd (3)), if we specify the effective flag as being enabled for any capability, then the effective flag must also be specified as enabled for all other capabilities for which the corresponding permitted or inheritable flags is enabled. .\" .SS Transformation of Capabilities During execve() .PP During an .BR execve (2), the kernel calculates the new capabilities of the process using the following algorithm: .RS .nf P'(permitted) = (P(inheritable) & F(inheritable)) | (F(permitted) & cap_bset) P'(effective) = F(effective) ? P'(permitted) : 0 P'(inheritable) = P(inheritable) [i.e., unchanged] .fi .RE where: .RS 4 .IP P 10 denotes the value of a thread capability set before the .BR execve (2) .IP P' denotes the value of a capability set after the .BR execve (2) .IP F denotes a file capability set .IP cap_bset is the value of the capability bounding set (described below). .RE .\" .SS Capabilities and execution of programs by root In order to provide an all-powerful .I root using capability sets, during an .BR execve (2): .IP 1. 3 If a set-user-ID-root program is being executed, or the real user ID of the process is 0 (root) then the file inheritable and permitted sets are defined to be all ones (i.e., all capabilities enabled). .IP 2. If a set-user-ID-root program is being executed, then the file effective bit is defined to be one (enabled). .PP The upshot of the above rules, combined with the capabilities transformations described above, is that when a process .BR execve (2)s a set-user-ID-root program, or when a process with an effective UID of 0 .BR execve (2)s a program, it gains all capabilities in its permitted and effective capability sets, except those masked out by the capability bounding set. .\" If a process with real UID 0, and nonzero effective UID does an .\" exec(), then it gets all capabilities in its .\" permitted set, and no effective capabilities This provides semantics that are the same as those provided by traditional UNIX systems. .SS Capability bounding set The capability bounding set is a security mechanism that can be used to limit the capabilities that can be gained during an .BR execve (2). The bounding set is used in the following ways: .IP * 2 During an .BR execve (2), the capability bounding set is ANDed with the file permitted capability set, and the result of this operation is assigned to the thread's permitted capability set. The capability bounding set thus places a limit on the permitted capabilities that may be granted by an executable file. .IP * (Since Linux 2.6.25) The capability bounding set acts as a limiting superset for the capabilities that a thread can add to its inheritable set using .BR capset (2). This means that if a capability is not in the bounding set, then a thread can't add this capability to its inheritable set, even if it was in its permitted capabilities, and thereby cannot have this capability preserved in its permitted set when it .BR execve (2)s a file that has the capability in its inheritable set. .PP Note that the bounding set masks the file permitted capabilities, but not the inherited capabilities. If a thread maintains a capability in its inherited set that is not in its bounding set, then it can still gain that capability in its permitted set by executing a file that has the capability in its inherited set. .PP Depending on the kernel version, the capability bounding set is either a system-wide attribute, or a per-process attribute. .PP .B "Capability bounding set prior to Linux 2.6.25" .PP In kernels before 2.6.25, the capability bounding set is a system-wide attribute that affects all threads on the system. The bounding set is accessible via the file .IR /proc/sys/kernel/cap-bound . (Confusingly, this bit mask parameter is expressed as a signed decimal number in .IR /proc/sys/kernel/cap-bound .) Only the .B init process may set capabilities in the capability bounding set; other than that, the superuser (more precisely: programs with the .B CAP_SYS_MODULE capability) may only clear capabilities from this set. On a standard system the capability bounding set always masks out the .B CAP_SETPCAP capability. To remove this restriction (dangerous!), modify the definition of .B CAP_INIT_EFF_SET in .I include/linux/capability.h and rebuild the kernel. The system-wide capability bounding set feature was added to Linux starting with kernel version 2.2.11. .\" .PP .B "Capability bounding set from Linux 2.6.25 onward" .PP From Linux 2.6.25, the .I "capability bounding set" is a per-thread attribute. (There is no longer a system-wide capability bounding set.) The bounding set is inherited at .BR fork (2) from the thread's parent, and is preserved across an .BR execve (2). A thread may remove capabilities from its capability bounding set using the .BR prctl (2) .B PR_CAPBSET_DROP operation, provided it has the .B CAP_SETPCAP capability. Once a capability has been dropped from the bounding set, it cannot be restored to that set. A thread can determine if a capability is in its bounding set using the .BR prctl (2) .B PR_CAPBSET_READ operation. Removing capabilities from the bounding set is only supported if file capabilities are compiled into the kernel. In kernels before Linux 2.6.33, file capabilities were an optional feature configurable via the CONFIG_SECURITY_FILE_CAPABILITIES option. Since Linux 2.6.33, the configuration option has been removed and file capabilities are always part of the kernel. When file capabilities are compiled into the kernel, the .B init process (the ancestor of all processes) begins with a full bounding set. If file capabilities are not compiled into the kernel, then .B init begins with a full bounding set minus .BR CAP_SETPCAP , because this capability has a different meaning when there are no file capabilities. Removing a capability from the bounding set does not remove it from the thread's inherited set. However it does prevent the capability from being added back into the thread's inherited set in the future. .\" .\" .SS Effect of User ID Changes on Capabilities To preserve the traditional semantics for transitions between 0 and nonzero user IDs, the kernel makes the following changes to a thread's capability sets on changes to the thread's real, effective, saved set, and file system user IDs (using .BR setuid (2), .BR setresuid (2), or similar): .IP 1. 3 If one or more of the real, effective or saved set user IDs was previously 0, and as a result of the UID changes all of these IDs have a nonzero value, then all capabilities are cleared from the permitted and effective capability sets. .IP 2. If the effective user ID is changed from 0 to nonzero, then all capabilities are cleared from the effective set. .IP 3. If the effective user ID is changed from nonzero to 0, then the permitted set is copied to the effective set. .IP 4. If the file system user ID is changed from 0 to nonzero (see .BR setfsuid (2)) then the following capabilities are cleared from the effective set: .BR CAP_CHOWN , .BR CAP_DAC_OVERRIDE , .BR CAP_DAC_READ_SEARCH , .BR CAP_FOWNER , .BR CAP_FSETID , .B CAP_LINUX_IMMUTABLE (since Linux 2.2.30), .BR CAP_MAC_OVERRIDE , and .B CAP_MKNOD (since Linux 2.2.30). If the file system UID is changed from nonzero to 0, then any of these capabilities that are enabled in the permitted set are enabled in the effective set. .PP If a thread that has a 0 value for one or more of its user IDs wants to prevent its permitted capability set being cleared when it resets all of its user IDs to nonzero values, it can do so using the .BR prctl (2) .B PR_SET_KEEPCAPS operation. .\" .SS Programmatically adjusting capability sets A thread can retrieve and change its capability sets using the .BR capget (2) and .BR capset (2) system calls. However, the use of .BR cap_get_proc (3) and .BR cap_set_proc (3), both provided in the .I libcap package, is preferred for this purpose. The following rules govern changes to the thread capability sets: .IP 1. 3 If the caller does not have the .B CAP_SETPCAP capability, the new inheritable set must be a subset of the combination of the existing inheritable and permitted sets. .IP 2. (Since kernel 2.6.25) The new inheritable set must be a subset of the combination of the existing inheritable set and the capability bounding set. .IP 3. The new permitted set must be a subset of the existing permitted set (i.e., it is not possible to acquire permitted capabilities that the thread does not currently have). .IP 4. The new effective set must be a subset of the new permitted set. .SS The """securebits"" flags: establishing a capabilities-only environment .\" For some background: .\" see http://lwn.net/Articles/280279/ and .\" http://article.gmane.org/gmane.linux.kernel.lsm/5476/ Starting with kernel 2.6.26, and with a kernel in which file capabilities are enabled, Linux implements a set of per-thread .I securebits flags that can be used to disable special handling of capabilities for UID 0 .RI ( root ). These flags are as follows: .TP .B SECBIT_KEEP_CAPS Setting this flag allows a thread that has one or more 0 UIDs to retain its capabilities when it switches all of its UIDs to a nonzero value. If this flag is not set, then such a UID switch causes the thread to lose all capabilities. This flag is always cleared on an .BR execve (2). (This flag provides the same functionality as the older .BR prctl (2) .B PR_SET_KEEPCAPS operation.) .TP .B SECBIT_NO_SETUID_FIXUP Setting this flag stops the kernel from adjusting capability sets when the threads's effective and file system UIDs are switched between zero and nonzero values. (See the subsection .IR "Effect of User ID Changes on Capabilities" .) .TP .B SECBIT_NOROOT If this bit is set, then the kernel does not grant capabilities when a set-user-ID-root program is executed, or when a process with an effective or real UID of 0 calls .BR execve (2). (See the subsection .IR "Capabilities and execution of programs by root" .) .PP Each of the above "base" flags has a companion "locked" flag. Setting any of the "locked" flags is irreversible, and has the effect of preventing further changes to the corresponding "base" flag. The locked flags are: .BR SECBIT_KEEP_CAPS_LOCKED , .BR SECBIT_NO_SETUID_FIXUP_LOCKED , and .BR SECBIT_NOROOT_LOCKED . .PP The .I securebits flags can be modified and retrieved using the .BR prctl (2) .B PR_SET_SECUREBITS and .B PR_GET_SECUREBITS operations. The .B CAP_SETPCAP capability is required to modify the flags. The .I securebits flags are inherited by child processes. During an .BR execve (2), all of the flags are preserved, except .B SECBIT_KEEP_CAPS which is always cleared. An application can use the following call to lock itself, and all of its descendants, into an environment where the only way of gaining capabilities is by executing a program with associated file capabilities: .RS .nf prctl(PR_SET_SECUREBITS, SECBIT_KEEP_CAPS_LOCKED | SECBIT_NO_SETUID_FIXUP | SECBIT_NO_SETUID_FIXUP_LOCKED | SECBIT_NOROOT | SECBIT_NOROOT_LOCKED); .fi .RE .SH "CONFORMING TO" .PP No standards govern capabilities, but the Linux capability implementation is based on the withdrawn POSIX.1e draft standard; see .IR http://wt.xpilot.org/publications/posix.1e/ . .SH NOTES Since kernel 2.5.27, capabilities are an optional kernel component, and can be enabled/disabled via the CONFIG_SECURITY_CAPABILITIES kernel configuration option. The .I /proc/PID/task/TID/status file can be used to view the capability sets of a thread. The .I /proc/PID/status file shows the capability sets of a process's main thread. The .I libcap package provides a suite of routines for setting and getting capabilities that is more comfortable and less likely to change than the interface provided by .BR capset (2) and .BR capget (2). This package also provides the .BR setcap (8) and .BR getcap (8) programs. It can be found at .br .IR http://www.kernel.org/pub/linux/libs/security/linux-privs . Before kernel 2.6.24, and since kernel 2.6.24 if file capabilities are not enabled, a thread with the .B CAP_SETPCAP capability can manipulate the capabilities of threads other than itself. However, this is only theoretically possible, since no thread ever has .BR CAP_SETPCAP in either of these cases: .IP * 2 In the pre-2.6.25 implementation the system-wide capability bounding set, .IR /proc/sys/kernel/cap-bound , always masks out this capability, and this can not be changed without modifying the kernel source and rebuilding. .IP * If file capabilities are disabled in the current implementation, then .B init starts out with this capability removed from its per-process bounding set, and that bounding set is inherited by all other processes created on the system. .SH "SEE ALSO" .BR capget (2), .BR prctl (2), .BR setfsuid (2), .BR cap_clear (3), .BR cap_copy_ext (3), .BR cap_from_text (3), .BR cap_get_file (3), .BR cap_get_proc (3), .BR cap_init (3), .BR capgetp (3), .BR capsetp (3), .BR credentials (7), .BR pthreads (7), .BR getcap (8), .BR setcap (8) .PP .I include/linux/capability.h in the kernel source .SH COLOPHON This page is part of release 3.35 of the Linux .I man-pages project. A description of the project, and information about reporting bugs, can be found at http://man7.org/linux/man-pages/. doclifter-2.11/tests/corosync.conf.man0000664000175000017500000005470412152465736016225 0ustar esresr.\"/* .\" * Copyright (c) 2005 MontaVista Software, Inc. .\" * Copyright (c) 2006-2010 Red Hat, Inc. .\" * .\" * All rights reserved. .\" * .\" * Author: Steven Dake (sdake@redhat.com) .\" * .\" * This software licensed under BSD license, the text of which follows: .\" * .\" * Redistribution and use in source and binary forms, with or without .\" * modification, are permitted provided that the following conditions are met: .\" * .\" * - Redistributions of source code must retain the above copyright notice, .\" * this list of conditions and the following disclaimer. .\" * - Redistributions in binary form must reproduce the above copyright notice, .\" * this list of conditions and the following disclaimer in the documentation .\" * and/or other materials provided with the distribution. .\" * - Neither the name of the MontaVista Software, Inc. nor the names of its .\" * contributors may be used to endorse or promote products derived from this .\" * software without specific prior written permission. .\" * .\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" .\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE .\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR .\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF .\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS .\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN .\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) .\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF .\" * THE POSSIBILITY OF SUCH DAMAGE. .\" */ .TH COROSYNC_CONF 5 2006-03-28 "corosync Man Page" "Corosync Cluster Engine Programmer's Manual" .SH NAME corosync.conf - corosync executive configuration file .SH SYNOPSIS /etc/corosync.conf .SH DESCRIPTION The corosync.conf instructs the corosync executive about various parameters needed to control the corosync executive. Empty lines and lines starting with # character are ignored. The configuration file consists of bracketed top level directives. The possible directive choices are: .TP totem { } This top level directive contains configuration options for the totem protocol. .TP logging { } This top level directive contains configuration options for logging. .TP event { } This top level directive contains configuration options for the event service. .PP .PP It is also possible to specify the top level parameter .B compatibility. This directive indicates the level of compatibility requested by the user. The option whitetank can be specified to remain backward compatable with openais-0.80.z. The option none can be specified to only be compatable with corosync-1.Y.Z. Extra processing during configuration changes is required to remain backward compatable. The default is whitetank. (backwards compatibility) .PP .PP Within the .B totem directive, an interface directive is required. There is also one configuration option which is required: .PP .PP Within the .B interface sub-directive of totem there are four parameters which are required. There is one parameter which is optional. .TP ringnumber This specifies the ring number for the interface. When using the redundant ring protocol, each interface should specify separate ring numbers to uniquely identify to the membership protocol which interface to use for which redundant ring. The ringnumber must start at 0. .TP bindnetaddr This specifies the network address the corosync executive should bind to. For example, if the local interface is 192.168.5.92 with netmask 255.255.255.0, set bindnetaddr to 192.168.5.0. If the local interface is 192.168.5.92 with netmask 255.255.255.192, set bindnetaddr to 192.168.5.64, and so forth. This may also be an IPV6 address, in which case IPV6 networking will be used. In this case, the full address must be specified and there is no automatic selection of the network interface within a specific subnet as with IPv4. If IPv6 networking is used, the nodeid field must be specified. .TP broadcast This is optional and can be set to yes. If it is set to yes, the broadcast address will be used for communication. If this option is set, mcastaddr should not be set. .TP mcastaddr This is the multicast address used by corosync executive. The default should work for most networks, but the network administrator should be queried about a multicast address to use. Avoid 224.x.x.x because this is a "config" multicast address. This may also be an IPV6 multicast address, in which case IPV6 networking will be used. If IPv6 networking is used, the nodeid field must be specified. .TP mcastport This specifies the UDP port number. It is possible to use the same multicast address on a network with the corosync services configured for different UDP ports. Please note corosync uses two UDP ports mcastport (for mcast receives) and mcastport - 1 (for mcast sends). If you have multiple clusters on the same network using the same mcastaddr please configure the mcastports with a gap. .TP ttl This specifies the Time To Live (TTL). If you run your cluster on a routed network then the default of "1" will be too small. This option provides a way to increase this up to 255. The valid range is 0..255. Note that this is only valid on multicast transport types. .TP member This specifies a member on the interface and used with the udpu transport only. Every node that should be a member of the membership should be specified as a separate member directive. Within the member directive there is a parameter memberaddr which specifies the ip address of one of the nodes. .PP .PP Within the .B totem directive, there are seven configuration options of which one is required, five are optional, and one is required when IPV6 is configured in the interface subdirective. The required directive controls the version of the totem configuration. The optional option unless using IPV6 directive controls identification of the processor. The optional options control secrecy and authentication, the redundant ring mode of operation, maximum network MTU, and number of sending threads, and the nodeid field. .TP version This specifies the version of the configuration file. Currently the only valid version for this directive is 2. .PP .PP .TP nodeid This configuration option is optional when using IPv4 and required when using IPv6. This is a 32 bit value specifying the node identifier delivered to the cluster membership service. If this is not specified with IPv4, the node id will be determined from the 32 bit IP address the system to which the system is bound with ring identifier of 0. The node identifier value of zero is reserved and should not be used. .TP clear_node_high_bit This configuration option is optional and is only relevant when no nodeid is specified. Some openais clients require a signed 32 bit nodeid that is greater than zero however by default openais uses all 32 bits of the IPv4 address space when generating a nodeid. Set this option to yes to force the high bit to be zero and therefor ensure the nodeid is a positive signed 32 bit integer. WARNING: The clusters behavior is undefined if this option is enabled on only a subset of the cluster (for example during a rolling upgrade). .TP secauth This specifies that HMAC/SHA1 authentication should be used to authenticate all messages. It further specifies that all data should be encrypted with the sober128 encryption algorithm to protect data from eavesdropping. Enabling this option adds a 36 byte header to every message sent by totem which reduces total throughput. Encryption and authentication consume 75% of CPU cycles in aisexec as measured with gprof when enabled. For 100mbit networks with 1500 MTU frame transmissions: A throughput of 9mb/sec is possible with 100% cpu utilization when this option is enabled on 3ghz cpus. A throughput of 10mb/sec is possible wth 20% cpu utilization when this optin is disabled on 3ghz cpus. For gig-e networks with large frame transmissions: A throughput of 20mb/sec is possible when this option is enabled on 3ghz cpus. A throughput of 60mb/sec is possible when this option is disabled on 3ghz cpus. The default is on. .TP rrp_mode This specifies the mode of redundant ring, which may be none, active, or passive. Active replication offers slightly lower latency from transmit to delivery in faulty network environments but with less performance. Passive replication may nearly double the speed of the totem protocol if the protocol doesn't become cpu bound. The final option is none, in which case only one network interface will be used to operate the totem protocol. If only one interface directive is specified, none is automatically chosen. If multiple interface directives are specified, only active or passive may be chosen. .TP netmtu This specifies the network maximum transmit unit. To set this value beyond 1500, the regular frame MTU, requires ethernet devices that support large, or also called jumbo, frames. If any device in the network doesn't support large frames, the protocol will not operate properly. The hosts must also have their mtu size set from 1500 to whatever frame size is specified here. Please note while some NICs or switches claim large frame support, they support 9000 MTU as the maximum frame size including the IP header. Setting the netmtu and host MTUs to 9000 will cause totem to use the full 9000 bytes of the frame. Then Linux will add a 18 byte header moving the full frame size to 9018. As a result some hardware will not operate properly with this size of data. A netmtu of 8982 seems to work for the few large frame devices that have been tested. Some manufacturers claim large frame support when in fact they support frame sizes of 4500 bytes. Increasing the MTU from 1500 to 8982 doubles throughput performance from 30MB/sec to 60MB/sec as measured with evsbench with 175000 byte messages with the secauth directive set to off. When sending multicast traffic, if the network frequently reconfigures, chances are that some device in the network doesn't support large frames. Choose hardware carefully if intending to use large frame support. The default is 1500. .TP threads This directive controls how many threads are used to encrypt and send multicast messages. If secauth is off, the protocol will never use threaded sending. If secauth is on, this directive allows systems to be configured to use multiple threads to encrypt and send multicast messages. A thread directive of 0 indicates that no threaded send should be used. This mode offers best performance for non-SMP systems. The default is 0. .TP vsftype This directive controls the virtual synchrony filter type used to identify a primary component. The preferred choice is YKD dynamic linear voting, however, for clusters larger then 32 nodes YKD consumes alot of memory. For large scale clusters that are created by changing the MAX_PROCESSORS_COUNT #define in the C code totem.h file, the virtual synchrony filter "none" is recommended but then AMF and DLCK services (which are currently experimental) are not safe for use. The default is ykd. The vsftype can also be set to none. .TP transport This directive controls the transport mechanism used. If the interface to which corosync is binding is an RDMA interface such as RoCEE or Infiniband, the "iba" parameter may be specified. To avoid the use of multicast entirely, a unicast transport parameter "udpu" can be specified. This requires specifying the list of members that could potentially make up the membership before deployment. The default is udp. The transport type can also be set to udpu or iba. Within the .B totem directive, there are several configuration options which are used to control the operation of the protocol. It is generally not recommended to change any of these values without proper guidance and sufficient testing. Some networks may require larger values if suffering from frequent reconfigurations. Some applications may require faster failure detection times which can be achieved by reducing the token timeout. .TP token This timeout specifies in milliseconds until a token loss is declared after not receiving a token. This is the time spent detecting a failure of a processor in the current configuration. Reforming a new configuration takes about 50 milliseconds in addition to this timeout. The default is 1000 milliseconds. .TP token_retransmit This timeout specifies in milliseconds after how long before receiving a token the token is retransmitted. This will be automatically calculated if token is modified. It is not recommended to alter this value without guidance from the corosync community. The default is 238 milliseconds. .TP hold This timeout specifies in milliseconds how long the token should be held by the representative when the protocol is under low utilization. It is not recommended to alter this value without guidance from the corosync community. The default is 180 milliseconds. .TP token_retransmits_before_loss_const This value identifies how many token retransmits should be attempted before forming a new configuration. If this value is set, retransmit and hold will be automatically calculated from retransmits_before_loss and token. The default is 4 retransmissions. .TP join This timeout specifies in milliseconds how long to wait for join messages in the membership protocol. The default is 50 milliseconds. .TP send_join This timeout specifies in milliseconds an upper range between 0 and send_join to wait before sending a join message. For configurations with less then 32 nodes, this parameter is not necessary. For larger rings, this parameter is necessary to ensure the NIC is not overflowed with join messages on formation of a new ring. A reasonable value for large rings (128 nodes) would be 80msec. Other timer values must also change if this value is changed. Seek advice from the corosync mailing list if trying to run larger configurations. The default is 0 milliseconds. .TP consensus This timeout specifies in milliseconds how long to wait for consensus to be achieved before starting a new round of membership configuration. The minimum value for consensus must be 1.2 * token. This value will be automatically calculated at 1.2 * token if the user doesn't specify a consensus value. For two node clusters, a consensus larger then the join timeout but less then token is safe. For three node or larger clusters, consensus should be larger then token. There is an increasing risk of odd membership changes, which stil guarantee virtual synchrony, as node count grows if consensus is less than token. The default is 1200 milliseconds. .TP merge This timeout specifies in milliseconds how long to wait before checking for a partition when no multicast traffic is being sent. If multicast traffic is being sent, the merge detection happens automatically as a function of the protocol. The default is 200 milliseconds. .TP downcheck This timeout specifies in milliseconds how long to wait before checking that a network interface is back up after it has been downed. The default is 1000 millseconds. .TP fail_recv_const This constant specifies how many rotations of the token without receiving any of the messages when messages should be received may occur before a new configuration is formed. The default is 2500 failures to receive a message. .TP seqno_unchanged_const This constant specifies how many rotations of the token without any multicast traffic should occur before the merge detection timeout is started. The default is 30 rotations. .TP heartbeat_failures_allowed [HeartBeating mechanism] Configures the optional HeartBeating mechanism for faster failure detection. Keep in mind that engaging this mechanism in lossy networks could cause faulty loss declaration as the mechanism relies on the network for heartbeating. So as a rule of thumb use this mechanism if you require improved failure in low to medium utilized networks. This constant specifies the number of heartbeat failures the system should tolerate before declaring heartbeat failure e.g 3. Also if this value is not set or is 0 then the heartbeat mechanism is not engaged in the system and token rotation is the method of failure detection The default is 0 (disabled). .TP max_network_delay [HeartBeating mechanism] This constant specifies in milliseconds the approximate delay that your network takes to transport one packet from one machine to another. This value is to be set by system engineers and please dont change if not sure as this effects the failure detection mechanism using heartbeat. The default is 50 milliseconds. .TP window_size This constant specifies the maximum number of messages that may be sent on one token rotation. If all processors perform equally well, this value could be large (300), which would introduce higher latency from origination to delivery for very large rings. To reduce latency in large rings(16+), the defaults are a safe compromise. If 1 or more slow processor(s) are present among fast processors, window_size should be no larger then 256000 / netmtu to avoid overflow of the kernel receive buffers. The user is notified of this by the display of a retransmit list in the notification logs. There is no loss of data, but performance is reduced when these errors occur. The default is 50 messages. .TP max_messages This constant specifies the maximum number of messages that may be sent by one processor on receipt of the token. The max_messages parameter is limited to 256000 / netmtu to prevent overflow of the kernel transmit buffers. The default is 17 messages. .TP miss_count_const This constant defines the maximum number of times on receipt of a token a message is checked for retransmission before a retransmission occurs. This parameter is useful to modify for switches that delay multicast packets compared to unicast packets. The default setting works well for nearly all modern switches. The default is 5 messages. .TP rrp_problem_count_timeout This specifies the time in milliseconds to wait before decrementing the problem count by 1 for a particular ring to ensure a link is not marked faulty for transient network failures. The default is 2000 milliseconds. .TP rrp_problem_count_threshold This specifies the number of times a problem is detected with a link before setting the link faulty. Once a link is set faulty, no more data is transmitted upon it. Also, the problem counter is no longer decremented when the problem count timeout expires. A problem is detected whenever all tokens from the proceeding processor have not been received within the rrp_token_expired_timeout. The rrp_problem_count_threshold * rrp_token_expired_timeout should be atleast 50 milliseconds less then the token timeout, or a complete reconfiguration may occur. The default is 10 problem counts. .TP rrp_problem_count_mcast_threshold This specifies the number of times a problem is detected with multicast before setting the link faulty for passive rrp mode. This variable is unused in active rrp mode. The default is 10 times rrp_problem_count_threshold. .TP rrp_token_expired_timeout This specifies the time in milliseconds to increment the problem counter for the redundant ring protocol after not having received a token from all rings for a particular processor. This value will automatically be calculated from the token timeout and problem_count_threshold but may be overridden. It is not recommended to override this value without guidance from the corosync community. The default is 47 milliseconds. .TP rrp_autorecovery_check_timeout This specifies the time in milliseconds to check if the failed ring can be auto-recovered. The default is 1000 milliseconds. .PP Within the .B logging directive, there are several configuration options which are all optional. .PP The following 3 options are valid only for the top level logging directive: .TP timestamp This specifies that a timestamp is placed on all log messages. The default is off. .TP fileline This specifies that file and line should be printed. The default is off. .TP function_name This specifies that the code function name should be printed. The default is off. .PP The following options are valid both for top level logging directive and they can be overriden in logger_subsys entries. .TP to_stderr .TP to_logfile .TP to_syslog These specify the destination of logging output. Any combination of these options may be specified. Valid options are .B yes and .B no. The default is syslog and stderr. Please note, if you are using to_logfile and want to rotate the file, use logrotate(8) with the option .B copytruncate. eg. .ne 18 .RS .nf .ft CW /var/log/corosync.log { missingok compress notifempty daily rotate 7 copytruncate } .ft .fi .RE .TP logfile If the .B to_logfile directive is set to .B yes , this option specifies the pathname of the log file. No default. .TP logfile_priority This specifies the logfile priority for this particular subsystem. Ignored if debug is on. Possible values are: alert, crit, debug (same as debug = on), emerg, err, info, notice, warning. The default is: info. .TP syslog_facility This specifies the syslog facility type that will be used for any messages sent to syslog. options are daemon, local0, local1, local2, local3, local4, local5, local6 & local7. The default is daemon. .TP syslog_priority This specifies the syslog level for this particular subsystem. Ignored if debug is on. Possible values are: alert, crit, debug (same as debug = on), emerg, err, info, notice, warning. The default is: info. .TP debug This specifies whether debug output is logged for this particular logger. The default is off. .TP tags This specifies which tags should be traced for this particular logger. Set debug directive to .B on in order to enable tracing using tags. Values are specified using a vertical bar as a logical OR separator: enter|leave|trace1|trace2|trace3|... The default is none. .PP Within the .B logging directive, logger_subsys directives are optional. .PP Within the .B logger_subsys sub-directive, all of the above logging configuration options are valid and can be used to override the default settings. The subsys entry, described below, is mandatory to identify the subsystem. .TP subsys This specifies the subsystem identity (name) for which logging is specified. This is the name used by a service in the log_init () call. E.g. 'CKPT'. This directive is required. .SH "FILES" .TP /etc/corosync.conf The corosync executive configuration file. .SH "SEE ALSO" .BR corosync_overview (8), .BR logrotate (8) .PP doclifter-2.11/tests/groff_char.man0000664000175000017500000010437012152465736015535 0ustar esresr.TH GROFF_CHAR 7 "01 April 2012" "Groff Version 1.21" .SH NAME groff_char \- groff glyph names .SH DESCRIPTION .\" The lines above were designed to satisfy `apropos'. . .\" For best results, format this document with `groff' (GNU roff). . . .\" -------------------------------------------------------------------- .\" Legal terms .\" -------------------------------------------------------------------- . .ig groff_char(7) This file is part of groff (GNU roff). File position: /man/groff_char.man Copyright (C) 1989-2000, 2001, 2002, 2003, 2004, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. written by Werner Lemberg with additions by Bernd Warken Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.3 or any later version published by the Free Software Foundation; with the Invariant Sections being this .ig-section and AUTHOR, with no Front-Cover Texts, and with no Back-Cover Texts. A copy of the Free Documentation License is included as a file called FDL in the main directory of the groff source package. .. .ig A copy of the GNU Free Documentation License is also available in this Debian package as /usr/share/doc/groff/copyright. .. . .\" -------------------------------------------------------------------- .\" Setup .\" -------------------------------------------------------------------- . .do nr groff_char_C \n[.C] .cp 0 . .\" groff only .\".if \n(.g .ne 2v .\".if \n(.g .sv 2v . .ds aq \(aq . .\" non-groff .if !\n(.g .if '\(aq'' .ds aq \' . .nr Sp 2n . .do if !r ECFONTS .do fspecial CR R . . .\" -------------------------------------------------------------------- .\" .SH DESCRIPTION .\" -------------------------------------------------------------------- . This manual page lists the standard .B groff glyph names and the default input mapping, \%latin1. . The glyphs in this document look different depending on which output device was chosen (with option .B \-T for the .BR man (1) program or the roff formatter). . Glyphs not available for the device that is being used to print or view this manual page are marked with .ie \n(.g `(N/A)'; the device currently used is `\*(.T'. .el `(N/A)'. . . .P In the actual version, .B groff provides only \%8-bit characters for direct input and named entities for further glyphs. . On ASCII platforms, input character codes in the range 0 to 127 (decimal) represent the usual \%7-bit ASCII characters, while codes between 127 and 255 are interpreted as the corresponding characters in the .I \%latin1 .RI ( \%ISO-8859-1 ) code set by default. . This mapping is contained in the file \f(CWlatin1.tmac\fP and can be changed by loading a different input encoding. . Note that some of the input characters are reserved by .BR groff , either for internal use or for special input purposes. . On EBCDIC platforms, only code page .I cp1047 is supported (which contains the same characters as \%latin1; the input encoding file is called \f(CWcp1047.tmac\fP). . Again, some input characters are reserved for internal and special purposes. . . .P All roff systems provide the concept of named glyphs. . In traditional roff systems, only names of length\ 2 were used, while groff also provides support for longer names. . It is strongly suggested that only named glyphs are used for all character representations outside of the printable \%7-bit ASCII range. . . .P Some of the predefined groff escape sequences (with names of length\ 1) also produce single glyphs; these exist for historical reasons or are printable versions of syntactical characters. . They include `\f(CW\e\e\fP', `\f(CW\e\'\fP', `\f(CW\e`\fP', `\f(CW\e-\fP', `\f(CW\e.\fP', and `\f(CW\ee\fP'; see .BR groff (7). . . .P In groff, all of these different types of characters and glyphs can be tested positively with the `\f(CW.if\ c\fP' conditional. . . .\" -------------------------------------------------------------------- .SH REFERENCE .\" -------------------------------------------------------------------- . In this section, the glyphs in groff are specified in tabular form. . The meaning of the columns is as follows. . . .TP .I "Output" shows how the glyph is printed for the current device; although this can have quite a different shape on other devices, it always represents the same glyph. . . .TP .I "Input" specifies how the glyph is input either directly by a key on the keyboard, or by a groff escape sequence. . . .TP .I "Code" applies to glyphs which can be input with a single character, and gives the ISO \%latin1 decimal code of that input character. . Note that this code is equivalent to the lowest 256 Unicode characters, including \%7-bit ASCII in the range 0 to\ 127. . . .TP .I "PostScript" gives the usual PostScript name of the glyph. . . .TP .I "Unicode" is the glyph name used in composite glyph names. . . . .\" -------------------------------------------------------------------- .SS "7-bit Character Codes 32-126" .\" -------------------------------------------------------------------- . These are the basic glyphs having 7-bit ASCII code values assigned. . They are identical to the printable characters of the character standards \%ISO-8859-1 (\%latin1) and Unicode (range .IR "Basic Latin" ). . The glyph names used in composite glyph names are `u0020' up to `u007E'. . . .P Note that input characters in the range \%0\-31 and character 127 are .I not printable characters. . Most of them are invalid input characters for .B groff anyway, and the valid ones have special meaning. . For EBCDIC, the printable characters are in the range \%66\-255. . . .TP 48\-57 Decimal digits 0 to\ 9 (print as themselves). . . .TP 65\-90 Upper case letters A\-Z (print as themselves). . . .TP 97\-122 Lower case letters a\-z (print as themselves). . . .P Most of the remaining characters not in the just described ranges print as themselves; the only exceptions are the following characters: . . .TP .B \` the ISO \%latin1 `Grave Accent' (code\ 96) prints as `, a left single quotation mark; the original character can be obtained with `\f(CW\e`\fP'. . . .TP .B \*(aq the ISO \%latin1 `Apostrophe' (code\ 39) prints as ', a right single quotation mark; the original character can be obtained with `\f(CW\e(aq\fP'. . . .TP .B - the ISO \%latin1 `Hyphen, Minus Sign' (code\ 45) prints as a hyphen; a minus sign can be obtained with `\f(CW\e-\fP'. . . .TP .B ~ the ISO \%latin1 `Tilde' (code\ 126) is reduced in size to be usable as a diacritic; a larger glyph can be obtained with `\f(CW\e(ti\fP'. . . .TP .B ^ the ISO \%latin1 `Circumflex Accent' (code\ 94) is reduced in size to be usable as a diacritic; a larger glyph can be obtained with `\f(CW\e(ha\fP'. . . .P .TS l l l l l. Output Input Code PostScript Unicode Notes _ \[char33] \[char33] 33 exclam u0021 \[char34] \[char34] 34 quotedbl u0022 \[char35] \[char35] 35 numbersign u0023 \[char36] \[char36] 36 dollar u0024 \[char37] \[char37] 37 percent u0025 \[char38] \[char38] 38 ampersand u0026 \[char39] \[char39] 39 quoteright u0027 \[char40] \[char40] 40 parenleft u0028 \[char41] \[char41] 41 parenright u0029 \[char42] \[char42] 42 asterisk u002A \[char43] \[char43] 43 plus u002B \[char44] \[char44] 44 comma u002C \[char45] \[char45] 45 hyphen u2010 \[char46] \[char46] 46 period u002E \[char47] \[char47] 47 slash u002F \[char58] \[char58] 58 colon u003A \[char59] \[char59] 59 semicolon u003B \[char60] \[char60] 60 less u003C \[char61] \[char61] 61 equal u003D \[char62] \[char62] 62 greater u003E \[char63] \[char63] 63 question u003F \[char64] \[char64] 64 at u0040 \[char91] \[char91] 91 bracketleft u005B \[char92] \[char92] 92 backslash u005C \[char93] \[char93] 93 bracketright u005D \[char94] \[char94] 94 circumflex u005E circumflex accent \[char95] \[char95] 95 underscore u005F \[char96] \[char96] 96 quoteleft u0060 \[char123] \[char123] 123 braceleft u007B \[char124] \[char124] 124 bar u007C \[char125] \[char125] 125 braceright u007D \[char126] \[char126] 126 tilde u007E tilde accent .TE . . .\" -------------------------------------------------------------------- .SS "8-bit Character Codes 160 to 255" .\" -------------------------------------------------------------------- . They are interpreted as printable characters according to the .I latin1 .RI ( ISO-8859-1 ) code set, being identical to the Unicode range .IR "Latin-1 Supplement" . . . .P Input characters in range 128-159 (on non-EBCDIC hosts) are not printable characters. . . .TP 160 . the ISO \%latin1 .I no-break space is mapped to `\f(CW\e~\fP', the stretchable space character. . . .TP 173 . the soft hyphen control character. . .B groff never uses this character for output (thus it is omitted in the table below); the input character\ 173 is mapped onto `\f(CW\e%\fP'. . . .P The remaining ranges (\%161\-172, \%174\-255) are printable characters that print as themselves. . Although they can be specified directly with the keyboard on systems with a \%latin1 code page, it is better to use their glyph names; see next section. . .P .TS l l l l l. Output Input Code PostScript Unicode Notes _ \[char161] \[char161] 161 exclamdown u00A1 inverted exclamation mark \[char162] \[char162] 162 cent u00A2 \[char163] \[char163] 163 sterling u00A3 \[char164] \[char164] 164 currency u00A4 \[char165] \[char165] 165 yen u00A5 \[char166] \[char166] 166 brokenbar u00A6 \[char167] \[char167] 167 section u00A7 \[char168] \[char168] 168 dieresis u00A8 \[char169] \[char169] 169 copyright u00A9 \[char170] \[char170] 170 ordfeminine u00AA \[char171] \[char171] 171 guillemotleft u00AB \[char172] \[char172] 172 logicalnot u00AC \[char174] \[char174] 174 registered u00AE \[char175] \[char175] 175 macron u00AF \[char176] \[char176] 176 degree u00B0 \[char177] \[char177] 177 plusminus u00B1 \[char178] \[char178] 178 twosuperior u00B2 \[char179] \[char179] 179 threesuperior u00B3 \[char180] \[char180] 180 acute u00B4 acute accent \[char181] \[char181] 181 mu u00B5 micro sign \[char182] \[char182] 182 paragraph u00B6 \[char183] \[char183] 183 periodcentered u00B7 \[char184] \[char184] 184 cedilla u00B8 \[char185] \[char185] 185 onesuperior u00B9 \[char186] \[char186] 186 ordmasculine u00BA \[char187] \[char187] 187 guillemotright u00BB \[char188] \[char188] 188 onequarter u00BC \[char189] \[char189] 189 onehalf u00BD \[char190] \[char190] 190 threequarters u00BE \[char191] \[char191] 191 questiondown u00BF \[char192] \[char192] 192 Agrave u0041_0300 \[char193] \[char193] 193 Aacute u0041_0301 \[char194] \[char194] 194 Acircumflex u0041_0302 \[char195] \[char195] 195 Atilde u0041_0303 \[char196] \[char196] 196 Adieresis u0041_0308 \[char197] \[char197] 197 Aring u0041_030A \[char198] \[char198] 198 AE u00C6 \[char199] \[char199] 199 Ccedilla u0043_0327 \[char200] \[char200] 200 Egrave u0045_0300 \[char201] \[char201] 201 Eacute u0045_0301 \[char202] \[char202] 202 Ecircumflex u0045_0302 \[char203] \[char203] 203 Edieresis u0045_0308 \[char204] \[char204] 204 Igrave u0049_0300 \[char205] \[char205] 205 Iacute u0049_0301 \[char206] \[char206] 206 Icircumflex u0049_0302 \[char207] \[char207] 207 Idieresis u0049_0308 \[char208] \[char208] 208 Eth u00D0 \[char209] \[char209] 209 Ntilde u004E_0303 \[char210] \[char210] 210 Ograve u004F_0300 \[char211] \[char211] 211 Oacute u004F_0301 \[char212] \[char212] 212 Ocircumflex u004F_0302 \[char213] \[char213] 213 Otilde u004F_0303 \[char214] \[char214] 214 Odieresis u004F_0308 \[char215] \[char215] 215 multiply u00D7 \[char216] \[char216] 216 Oslash u00D8 \[char217] \[char217] 217 Ugrave u0055_0300 \[char218] \[char218] 218 Uacute u0055_0301 \[char219] \[char219] 219 Ucircumflex u0055_0302 \[char220] \[char220] 220 Udieresis u0055_0308 \[char221] \[char221] 221 Yacute u0059_0301 \[char222] \[char222] 222 Thorn u00DE \[char223] \[char223] 223 germandbls u00DF \[char224] \[char224] 224 agrave u0061_0300 \[char225] \[char225] 225 aacute u0061_0301 \[char226] \[char226] 226 acircumflex u0061_0302 \[char227] \[char227] 227 atilde u0061_0303 \[char228] \[char228] 228 adieresis u0061_0308 \[char229] \[char229] 229 aring u0061_030A \[char230] \[char230] 230 ae u00E6 \[char231] \[char231] 231 ccedilla u0063_0327 \[char232] \[char232] 232 egrave u0065_0300 \[char233] \[char233] 233 eacute u0065_0301 \[char234] \[char234] 234 ecircumflex u0065_0302 \[char235] \[char235] 235 edieresis u0065_0308 \[char236] \[char236] 236 igrave u0069_0300 \[char237] \[char237] 237 iacute u0069_0301 \[char238] \[char238] 238 icircumflex u0069_0302 \[char239] \[char239] 239 idieresis u0069_0308 \[char240] \[char240] 240 eth u00F0 \[char241] \[char241] 241 ntilde u006E_0303 \[char242] \[char242] 242 ograve u006F_0300 \[char243] \[char243] 243 oacute u006F_0301 \[char244] \[char244] 244 ocircumflex u006F_0302 \[char245] \[char245] 245 otilde u006F_0303 \[char246] \[char246] 246 odieresis u006F_0308 \[char247] \[char247] 247 divide u00F7 \[char248] \[char248] 248 oslash u00F8 \[char249] \[char249] 249 ugrave u0075_0300 \[char250] \[char250] 250 uacute u0075_0301 \[char251] \[char251] 251 ucircumflex u0075_0302 \[char252] \[char252] 252 udieresis u0075_0308 \[char253] \[char253] 253 yacute u0079_0301 \[char254] \[char254] 254 thorn u00FE \[char255] \[char255] 255 ydieresis u0079_0308 .TE . . .\" -------------------------------------------------------------------- .SS "Named Glyphs" .\" -------------------------------------------------------------------- . Glyph names can be embedded into the document text by using escape sequences. . .BR groff (7) describes how these escape sequences look. . Glyph names can consist of quite arbitrary characters from the ASCII or \%latin1 code set, not only alphanumeric characters. . Here some examples: . .TP \f(CW\e(\fP\fIch\fP A glyph having the 2-character name .IR ch . . .TP \f(CW\e[\fP\fIchar_name\fP\f(CW]\fP A glyph having the name .I char_name (having length 1, 2, 3, .\|.\|.). . Note that `\fIc\fP' is not the same as `\f(CW\e[\fP\fIc\fP\f(CW]\fP' (\fIc\fP\ a single character): The latter is internally mapped to glyph name `\e\fIc\fP'. . By default, groff defines a single glyph name starting with a backslash, namely \%`\e-', which can be either accessed as `\f(CW\e\-\fP' or `\f(CW\e[-]\fP'. . .TP \f(CW\e[\fP\fIbase_glyph composite_1 composite_2 .\|.\|.\fP\f(CW]\fP A composite glyph; see below for a more detailed description. . . .P In groff, each \%8-bit input character can also referred to by the construct `\f(CW\e[char\fP\fIn\fP\f(CW]\fP' where .I n is the decimal code of the character, a number between 0 and\ 255 without leading zeros (those entities are .I not glyph names). . They are normally mapped onto glyphs using the \f(CW.trin\fP request. . Another special convention is the handling of glyphs with names directly derived from a Unicode code point; this is discussed below. . Moreover, new glyph names can be created by the \f(CW.char\fP request; see .BR groff (7). . .P In the following, a plus sign in the `Notes' column indicates that this particular glyph name appears in the PS version of the original troff documentation, CSTR\ 54. . .P Entries marked with `***' denote glyphs for mathematical purposes (mainly used for DVI output). Normally, such glyphs have metrics which make them unusable in normal text. . . .P .TS l l l l l. Output Input PostScript Unicode Notes _ \[-D] \e[-D] Eth u00D0 uppercase eth \[Sd] \e[Sd] eth u00F0 lowercase eth \[TP] \e[TP] Thorn u00DE uppercase thorn \[Tp] \e[Tp] thorn u00FE lowercase thorn \[ss] \e[ss] germandbls u00DF German sharp s .TE . .P .I Ligatures and Other Latin Glyphs .P .TS l l l l l. Output Input PostScript Unicode Notes _ \[ff] \e[ff] ff u0066_0066 ff ligature + \[fi] \e[fi] fi u0066_0069 fi ligature + \[fl] \e[fl] fl u0066_006C fl ligature + \[Fi] \e[Fi] ffi u0066_0066_0069 ffi ligature + \[Fl] \e[Fl] ffl u0066_0066_006C ffl ligature + \[/L] \e[/L] Lslash u0141 (Polish) \[/l] \e[/l] lslash u0142 (Polish) \[/O] \e[/O] Oslash u00D8 (Scandinavian) \[/o] \e[/o] oslash u00F8 (Scandinavian) \[AE] \e[AE] AE u00C6 \[ae] \e[ae] ae u00E6 \[OE] \e[OE] OE u0152 \[oe] \e[oe] oe u0153 \[IJ] \e[IJ] IJ u0132 (Dutch) \[ij] \e[ij] ij u0133 (Dutch) \[.i] \e[.i] dotlessi u0131 (Turkish) \[.j] \e[.j] dotlessj --- j without a dot .TE . .P .I Accented Characters .P .TS l l l l l. Output Input PostScript Unicode Notes _ \['A] \e['A] Aacute u0041_0301 \['C] \e['C] Cacute u0043_0301 \['E] \e['E] Eacute u0045_0301 \['I] \e['I] Iacute u0049_0301 \['O] \e['O] Oacute u004F_0301 \['U] \e['U] Uacute u0055_0301 \['Y] \e['Y] Yacute u0059_0301 \['a] \e['a] aacute u0061_0301 \['c] \e['c] cacute u0063_0301 \['e] \e['e] eacute u0065_0301 \['i] \e['i] iacute u0069_0301 \['o] \e['o] oacute u006F_0301 \['u] \e['u] uacute u0075_0301 \['y] \e['y] yacute u0079_0301 \[:A] \e[:A] Adieresis u0041_0308 A with umlaut \[:E] \e[:E] Edieresis u0045_0308 \[:I] \e[:I] Idieresis u0049_0308 \[:O] \e[:O] Odieresis u004F_0308 \[:U] \e[:U] Udieresis u0055_0308 \[:Y] \e[:Y] Ydieresis u0059_0308 \[:a] \e[:a] adieresis u0061_0308 \[:e] \e[:e] edieresis u0065_0308 \[:i] \e[:i] idieresis u0069_0308 \[:o] \e[:o] odieresis u006F_0308 \[:u] \e[:u] udieresis u0075_0308 \[:y] \e[:y] ydieresis u0079_0308 \[^A] \e[^A] Acircumflex u0041_0302 \[^E] \e[^E] Ecircumflex u0045_0302 \[^I] \e[^I] Icircumflex u0049_0302 \[^O] \e[^O] Ocircumflex u004F_0302 \[^U] \e[^U] Ucircumflex u0055_0302 \[^a] \e[^a] acircumflex u0061_0302 \[^e] \e[^e] ecircumflex u0065_0302 \[^i] \e[^i] icircumflex u0069_0302 \[^o] \e[^o] ocircumflex u006F_0302 \[^u] \e[^u] ucircumflex u0075_0302 \[`A] \e[`A] Agrave u0041_0300 \[`E] \e[`E] Egrave u0045_0300 \[`I] \e[`I] Igrave u0049_0300 \[`O] \e[`O] Ograve u004F_0300 \[`U] \e[`U] Ugrave u0055_0300 \[`a] \e[`a] agrave u0061_0300 \[`e] \e[`e] egrave u0065_0300 \[`i] \e[`i] igrave u0069_0300 \[`o] \e[`o] ograve u006F_0300 \[`u] \e[`u] ugrave u0075_0300 \[~A] \e[~A] Atilde u0041_0303 \[~N] \e[~N] Ntilde u004E_0303 \[~O] \e[~O] Otilde u004F_0303 \[~a] \e[~a] atilde u0061_0303 \[~n] \e[~n] ntilde u006E_0303 \[~o] \e[~o] otilde u006F_0303 \[vS] \e[vS] Scaron u0053_030C \[vs] \e[vs] scaron u0073_030C \[vZ] \e[vZ] Zcaron u005A_030C \[vz] \e[vz] zcaron u007A_030C \[,C] \e[,C] Ccedilla u0043_0327 \[,c] \e[,c] ccedilla u0063_0327 \[oA] \e[oA] Aring u0041_030A \[oa] \e[oa] aring u0061_030A .TE . .P .I Accents .P The .B composite request is used to map most of the accents to non-spacing glyph names; the values given in parentheses are the original (spacing) ones. . .P .TS l l l l l. Output Input PostScript Unicode Notes _ \[a"] \e[a"] hungarumlaut u030B (u02DD) (Hungarian) \[a-] \e[a-] macron u0304 (u00AF) \[a.] \e[a.] dotaccent u0307 (u02D9) \[a^] \e[a^] circumfle u0302 (u005E) \[aa] \e[aa] acute u0301 (u00B4) + \[ga] \e[ga] grave u0300 (u0060) + \[ab] \e[ab] breve u0306 (u02D8) \[ac] \e[ac] cedilla u0327 (u00B8) \[ad] \e[ad] dieresis u0308 (u00A8) umlaut \[ah] \e[ah] caron u030C (u02C7) \[ao] \e[ao] ring u030A (u02DA) circle \[a~] \e[a~] tilde u0303 (u007E) \[ho] \e[ho] ogonek u0328 (u02DB) hook \[ha] \e[ha] asciicircum u005E (spacing) \[ti] \e[ti] asciitilde u007E (spacing) .TE . .P .I Quotes .P .TS l l l l l. Output Input PostScript Unicode Notes _ \[Bq] \e[Bq] quotedblbase u201E low double comma quote \[bq] \e[bq] quotesinglbase u201A low single comma quote \[lq] \e[lq] quotedblleft u201C \[rq] \e[rq] quotedblright u201D \[oq] \e[oq] quoteleft u2018 single open quote \[cq] \e[cq] quoteright u2019 single closing quote \[aq] \e[aq] quotesingle u0027 apostrophe quote (ASCII 39) \[dq] \e[dq] quotedbl u0022 double quote (ASCII 34) \[Fo] \e[Fo] guillemotleft u00AB \[Fc] \e[Fc] guillemotright u00BB \[fo] \e[fo] guilsinglleft u2039 \[fc] \e[fc] guilsinglright u203A .TE . .P .I Punctuation .P .TS l l l l l. Output Input PostScript Unicode Notes _ \[r!] \e[r!] exclamdown u00A1 \[r?] \e[r?] questiondown u00BF \[em] \e[em] emdash u2014 + \[en] \e[en] endash u2013 \[hy] \e[hy] hyphen u2010 + .TE . .P .I Brackets .P The extensible bracket pieces are font-invariant glyphs. . In classical troff only one glyph was available to vertically extend brackets, braces, and parentheses: `bv'. . We map it rather arbitrarily to u23AA. . .P Note that not all devices contain extensible bracket pieces which can be piled up with `\f(CW\eb\fP' due to the restrictions of the escape's piling algorithm. . A general solution to build brackets out of pieces is the following macro: . .P .nf .RS .ft C \&.\e" Make a pile centered vertically 0.5em \&.\e" above the baseline. \&.\e" The first argument is placed at the top. \&.\e" The pile is returned in string `pile' \&.eo \&.de pile-make \&. nr pile-wd 0 \&. nr pile-ht 0 \&. ds pile-args \&. \&. nr pile-# \en[.$] \&. while \en[pile-#] \e{\e \&. nr pile-wd (\en[pile-wd] >? \ew'\e$[\en[pile-#]]') \&. nr pile-ht +(\en[rst] - \en[rsb]) \&. as pile-args \ev'\en[rsb]u'\e" \&. as pile-args \eZ'\e$[\en[pile-#]]'\e" \&. as pile-args \ev'-\en[rst]u'\e" \&. nr pile-# -1 \&. \e} \&. \&. ds pile \ev'(-0.5m + (\en[pile-ht]u / 2u))'\e" \&. as pile \e*[pile-args]\e" \&. as pile \ev'((\en[pile-ht]u / 2u) + 0.5m)'\e" \&. as pile \eh'\en[pile-wd]u'\e" \&.. \&.ec .ft .RE .fi . .P Another complication is the fact that some glyphs which represent bracket pieces in original troff can be used for other mathematical symbols also, for example `lf' and `rf' which provide the `floor' operator. . Other devices (most notably for DVI output) don't unify such glyphs. . For this reason, the four glyphs `lf', `rf', `lc', and `rc' are not unified with similarly looking bracket pieces. . In .BR groff , only glyphs with long names are guaranteed to pile up correctly for all devices (provided those glyphs exist). . .P .TS expand; l l l l l. Output Input PostScript Unicode Notes _ \[lB] \e[lB] bracketleft u005B \[rB] \e[rB] bracketright u005D \[lC] \e[lC] braceleft u007B \[rC] \e[rC] braceright u007D \[la] \e[la] angleleft u27E8 left angle bracket \[ra] \e[ra] angleright u27E9 right angle bracket \[bv] \e[bv] braceex u23AA vertical extension *** + \[br] \e[braceex] braceex u23AA \[br] \e[bracketlefttp] bracketlefttp u23A1 \[br] \e[bracketleftbt] bracketleftbt u23A3 \[br] \e[bracketleftex] bracketleftex u23A2 \[br] \e[bracketrighttp] bracketrighttp u23A4 \[br] \e[bracketrightbt] bracketrightbt u23A6 \[br] \e[bracketrightex] bracketrightex u23A5 \[lt] \e[lt] bracelefttp u23A7 + \[br] \e[bracelefttp] bracelefttp u23A7 \[lk] \e[lk] braceleftmid u23A8 + \[br] \e[braceleftmid] braceleftmid u23A8 \[lb] \e[lb] braceleftbt u23A9 + \[br] \e[braceleftbt] braceleftbt u23A9 \[br] \e[braceleftex] braceleftex u23AA \[rt] \e[rt] bracerighttp u23AB + \[br] \e[bracerighttp] bracerighttp u23AB \[rk] \e[rk] bracerightmid u23AC + \[bracerightmid] \e[bracerightmid] bracerightmid u23AC \[rb] \e[rb] bracerightbt u23AD + \[bracerightbt] \e[bracerightbt] bracerightbt u23AD \[bracerightex] \e[bracerightex] bracerightex u23AA . \[parenlefttp] \e[parenlefttp] parenlefttp u239B \[parenleftbt] \e[parenleftbt] parenleftbt u239D \[parenleftex] \e[parenleftex] parenleftex u239C \[parenrighttp] \e[parenrighttp] parenrighttp u239E \[parenrightbt] \e[parenrightbt] parenrightbt u23A0 \[parenrightex] \e[parenrightex] parenrightex u239F .TE . .P .I Arrows .P .TS expand; l l l l l. Output Input PostScript Unicode Notes _ \[<-] \e[<-] arrowleft u2190 + \[->] \e[->] arrowright u2192 + \[<>] \e[<>] arrowboth u2194 (horizontal) \[da] \e[da] arrowdown u2193 + \[ua] \e[ua] arrowup u2191 + \[va] \e[va] arrowupdn u2195 \[lA] \e[lA] arrowdblleft u21D0 \[rA] \e[rA] arrowdblright u21D2 \[hA] \e[hA] arrowdblboth u21D4 (horizontal) \[dA] \e[dA] arrowdbldown u21D3 \[uA] \e[uA] arrowdblup u21D1 \[vA] \e[vA] uni21D5 u21D5 vertical double-headed double arrow \[an] \e[an] arrowhorizex u23AF horizontal arrow extension .TE . .P .I Lines .P The font-invariant glyphs `br', `ul', and `rn' form corners; they can be used to build boxes. . Note that both the PostScript and the Unicode-derived names of these three glyphs are just rough approximations. . .P `rn' also serves in classical troff as the horizontal extension of the square root sign. . .P `ru' is a font-invariant glyph, namely a rule of length 0.5m. . .P .TS expand; l l l l l. Output Input PostScript Unicode Notes _ \[ba] \e[ba] bar u007C \[br] \e[br] SF110000 u2502 box rule + \[ul] \e[ul] underscore u005F + \[rn] \e[rn] overline u203E + \[ru] \e[ru] --- --- baseline rule + \[bb] \e[bb] brokenbar u00A6 \[sl] \e[sl] slash u002F + \[rs] \e[rs] backslash u005C reverse solidus .TE .P Use `\f(CW\e[radicalex]\fP', not `\f(CW\e[overline]\fP', for continuation of square root . .P .I Text markers .P .TS expand; l l l l l. Output Input PostScript Unicode Notes _ \[ci] \e[ci] circle u25CB + \[bu] \e[bu] bullet u2022 + \[dd] \e[dd] daggerdbl u2021 double dagger sign + \[dg] \e[dg] dagger u2020 + \[lz] \e[lz] lozenge u25CA \[sq] \e[sq] uni25A1 u25A1 white square + \[ps] \e[ps] paragraph u00B6 \[sc] \e[sc] section u00A7 + \[lh] \e[lh] uni261C u261C hand pointing left + \[rh] \e[rh] a14 u261E hand pointing right + \[at] \e[at] at u0040 \[sh] \e[sh] numbersign u0023 \[CR] \e[CR] carriagereturn u21B5 \[OK] \e[OK] a19 u2713 check mark, tick .TE . .P .I Legal Symbols .P .TS expand; l l l l l. Output Input PostScript Unicode Notes _ \[co] \e[co] copyright u00A9 + \[rg] \e[rg] registered u00AE + \[tm] \e[tm] trademark u2122 \[bs] \e[bs] --- --- AT&T Bell Labs logo + .TE .P The Bell Labs logo is not supported in groff. . .P .I Currency symbols .P .TS expand; l l l l l. Output Input PostScript Unicode Notes _ \[Do] \e[Do] dollar u0024 \[ct] \e[ct] cent u00A2 + \[eu] \e[eu] --- u20AC official Euro symbol \[Eu] \e[Eu] Euro u20AC font-specific Euro glyph variant \[Ye] \e[Ye] yen u00A5 \[Po] \e[Po] sterling u00A3 British currency sign \[Cs] \e[Cs] currency u00A4 Scandinavian currency sign \[Fn] \e[Fn] florin u0192 Dutch currency sign .TE . .P .I Units .P .TS expand; l l l l l. Output Input PostScript Unicode Notes _ \[de] \e[de] degree u00B0 + \[%0] \e[%0] perthousand u2030 per thousand, per mille sign \[fm] \e[fm] minute u2032 footmark, prime + \[sd] \e[sd] second u2033 \[mc] \e[mc] mu u00B5 micro sign \[Of] \e[Of] ordfeminine u00AA \[Om] \e[Om] ordmasculine u00BA .TE . .P .I Logical Symbols .P .TS expand; l l l l l. Output Input PostScript Unicode Notes _ \[AN] \e[AN] logicaland u2227 \[OR] \e[OR] logicalor u2228 \[no] \e[no] logicalnot u00AC + \[tno] \e[tno] logicalnot u00AC text variant of `no' \[te] \e[te] existential u2203 there exists \[fa] \e[fa] universal u2200 for all \[st] \e[st] suchthat u220B \[3d] \e[3d] therefore u2234 \[tf] \e[tf] therefore u2234 .TE . .P .I Mathematical Symbols .P .TS expand; l l l l l. Output Input PostScript Unicode Notes _ \[12] \e[12] onehalf u00BD "+" \[14] \e[14] onequarter u00BC "+" \[34] \e[34] threequarters u00BE "+" \[18] \e[18] oneeighth u215B \[38] \e[38] threeeighths u215C \[58] \e[58] fiveeighths u215D \[78] \e[78] seveneighths u215E \[S1] \e[S1] onesuperior u00B9 \[S2] \e[S2] twosuperior u00B2 \[S3] \e[S3] threesuperior u00B3 \[pl] \e[pl] plus u002B plus in special font + \[mi] \e[mi] minus u2212 minus in special font + \[-+] \e[-+] uni2213 u2213 \[+-] \e[+-] plusminus u00B1 + \[t+-] \e[t+-] plusminus u00B1 text variant of `+\-' \[pc] \e[pc] periodcentered u00B7 \[md] \e[md] dotmath u22C5 multiplication dot \[mu] \e[mu] multiply u00D7 + \[tm] \e[tmu] multiply u00D7 text variant of `mu' \[c*] \e[c*] circlemultiply u2297 multiply sign in a circle \[c+] \e[c+] circleplus u2295 plus in a circle \[di] \e[di] divide u00F7 division + \[tdi] \e[tdi] divide u00F7 text variant of `di' \[f/] \e[f/] fraction u2044 bar for fractions \[**] \e[**] asteriskmath u2217 + \[<=] \e[<=] lessequal u2264 + \[>=] \e[>=] greaterequal u2265 + \[<<] \e[<<] uni226A u226A much less \[>>] \e[>>] uni226B u226B much greater \[eq] \e[eq] equal u003D equals in special font + \[!=] \e[!=] notequal u003D_0338 + \[==] \e[==] equivalence u2261 + \[ne] \e[ne] uni2262 u2261_0338 \[=~] \e[=~] congruent u2245 approx.\& equal \[|=] \e[|=] uni2243 u2243 asymptot.\& equal to + \[ap] \e[ap] similar u223C + \[~~] \e[~~] approxequal u2248 almost equal to \[~=] \e[~=] approxequal u2248 \[pt] \e[pt] proportional u221D + \[es] \e[es] emptyset u2205 + \[mo] \e[mo] element u2208 + \[nm] \e[nm] notelement u2208_0338 \[sb] \e[sb] propersubset u2282 + \[nb] \e[nb] notsubset u2282_0338 \[sp] \e[sp] propersuperset u2283 + \[nc] \e[nc] uni2285 u2283_0338 not superset \[ib] \e[ib] reflexsubset u2286 + \[ip] \e[ip] reflexsuperset u2287 + \[ca] \e[ca] intersection u2229 intersection, cap + \[cu] \e[cu] union u222A union, cup + \[/_] \e[/_] angle u2220 \[pp] \e[pp] perpendicular u22A5 \[is] \e[is] integral u222B + \[integral] \e[integral] integral u222B *** \[sum] \e[sum] summation u2211 *** \[product] \e[product] product u220F *** \[coproduct] \e[coproduct] uni2210 u2210 *** \[gr] \e[gr] gradient u2207 + \[sr] \e[sr] radical u221A square root + \[sq] \e[sqrt] radical u221A *** \[radicalex] \e[radicalex] radicalex --- square root continuation \[sqrtex] \e[sqrtex] radicalex --- *** \[lc] \e[lc] uni2308 u2308 left ceiling + \[rc] \e[rc] uni2309 u2309 right ceiling + \[lf] \e[lf] uni230A u230A left floor + \[rf] \e[rf] uni230B u230B right floor + \[if] \e[if] infinity u221E + \[Ah] \e[Ah] aleph u2135 \[Im] \e[Im] Ifraktur u2111 Gothic I, imaginary \[Re] \e[Re] Rfraktur u211C Gothic R, real \[wp] \e[wp] weierstrass u2118 Weierstrass p \[pd] \e[pd] partialdiff u2202 partial differentiation + \[-h] \e[-h] uni210F u210F Planck constant / 2pi \[hbar] \e[hbar] uni210F u210F .TE . .P .I Greek glyphs .P These glyphs are intended for technical use, not for real Greek; normally, the uppercase letters have upright shape, and the lowercase ones are slanted. . There is a problem with the mapping of letter phi to Unicode. . Prior to Unicode version\ 3.0, the difference between U+03C6, GREEK SMALL LETTER PHI, and U+03D5, GREEK PHI SYMBOL, was not clearly described; only the glyph shapes in the Unicode book could be used as a reference. . Starting with Unicode\ 3.0, the reference glyphs have been exchanged and described verbally also: In mathematical context, U+03D5 is the stroked variant and U+03C6 the curly glyph. . Unfortunately, most font vendors didn't update their fonts to this (incompatible) change in Unicode. . At the time of this writing (January 2006), it is not clear yet whether the Adobe Glyph Names `phi' and `phi1' also change its meaning if used for mathematics, thus compatibility problems are likely to happen \(en being conservative, groff currently assumes that `phi' in a PostScript symbol font is the stroked version. .P In groff, symbol `\f(CW\e[*f]\fP' always denotes the stroked version of phi, and `\f(CW\e[+f]\fP' the curly variant. .P .TS expand; l l l l l. Output Input PostScript Unicode Notes _ \[*A] \e[*A] Alpha u0391 + \[*B] \e[*B] Beta u0392 + \[*G] \e[*G] Gamma u0393 + \[*D] \e[*D] Delta u0394 + \[*E] \e[*E] Epsilon u0395 + \[*Z] \e[*Z] Zeta u0396 + \[*Y] \e[*Y] Eta u0397 + \[*H] \e[*H] Theta u0398 + \[*I] \e[*I] Iota u0399 + \[*K] \e[*K] Kappa u039A + \[*L] \e[*L] Lambda u039B + \[*M] \e[*M] Mu u039C + \[*N] \e[*N] Nu u039D + \[*C] \e[*C] Xi u039E + \[*O] \e[*O] Omicron u039F + \[*P] \e[*P] Pi u03A0 + \[*R] \e[*R] Rho u03A1 + \[*S] \e[*S] Sigma u03A3 + \[*T] \e[*T] Tau u03A4 + \[*U] \e[*U] Upsilon u03A5 + \[*F] \e[*F] Phi u03A6 + \[*X] \e[*X] Chi u03A7 + \[*Q] \e[*Q] Psi u03A8 + \[*W] \e[*W] Omega u03A9 + \[*a] \e[*a] alpha u03B1 + \[*b] \e[*b] beta u03B2 + \[*g] \e[*g] gamma u03B3 + \[*d] \e[*d] delta u03B4 + \[*e] \e[*e] epsilon u03B5 + \[*z] \e[*z] zeta u03B6 + \[*y] \e[*y] eta u03B7 + \[*h] \e[*h] theta u03B8 + \[*i] \e[*i] iota u03B9 + \[*k] \e[*k] kappa u03BA + \[*l] \e[*l] lambda u03BB + \[*m] \e[*m] mu u03BC + \[*n] \e[*n] nu u03BD + \[*c] \e[*c] xi u03BE + \[*o] \e[*o] omicron u03BF + \[*p] \e[*p] pi u03C0 + \[*r] \e[*r] rho u03C1 + \[ts] \e[ts] sigma1 u03C2 terminal sigma + \[*s] \e[*s] sigma u03C3 + \[*t] \e[*t] tau u03C4 + \[*u] \e[*u] upsilon u03C5 + \[*f] \e[*f] phi u03D5 (stroked glyph) + \[*x] \e[*x] chi u03C7 + \[*q] \e[*q] psi u03C8 + \[*w] \e[*w] omega u03C9 + \[+h] \e[+h] theta1 u03D1 variant theta \[+f] \e[+f] phi1 u03C6 variant phi (curly shape) \[+p] \e[+p] omega1 u03D6 variant pi, looking like omega \[+e] \e[+e] uni03F5 u03F5 variant epsilon .TE . .P .I Card symbols .P .TS expand; l l l l l. Output Input PostScript Unicode Notes _ \[CL] \e[CL] club u2663 black club suit \[SP] \e[SP] spade u2660 black spade suit \[HE] \e[HE] heart u2665 black heart suit \[u2661] \e[u2661] uni2661 u2661 white heart suit \[DI] \e[DI] diamond u2666 black diamond suit \[u2662] \e[u2662] uni2662 u2662 white diamond suit .TE . . .\" -------------------------------------------------------------------- .SH "AUTHOR" .\" -------------------------------------------------------------------- . Copyright \(co 1989-2000, 2001, 2002, 2003, 2004, 2006, 2008, 2009 Free Software Foundation, Inc. . .P This document is distributed under the terms of the FDL (GNU Free Documentation License) version 1.3 or later. . You should have received a copy of the FDL on your system, it is also available on-line at the .UR http://\:www.gnu.org/\:copyleft/\:fdl.html GNU copyleft site .UE . . .P This document is part of .IR groff , the GNU roff distribution. . It was written by .MT jjc@jclark.com James Clark .ME with additions by .MT wl@gnu.org Werner Lemberg .ME and .MT bwarken@mayn.de Bernd Warken .ME . . . .\" -------------------------------------------------------------------- .SH "SEE ALSO" .\" -------------------------------------------------------------------- . .TP .BR groff (1) the GNU roff formatter . .TP .BR groff (7) a short reference of the groff formatting language . . .P .IR "An extension to the troff character set for Europe" , E.G. Keizer, K.J. Simonsen, J. Akkerhuis; EUUG Newsletter, Volume 9, No. 2, Summer 1989 . . .P .UR http://\:www.unicode.org The Unicode Standard .UE . .cp \n[groff_char_C] . .\" -------------------------------------------------------------------- .\" Emacs settings .\" -------------------------------------------------------------------- .\" Local Variables: .\" mode: nroff .\" End: doclifter-2.11/tests/Makefile0000664000175000017500000000107212152465736014373 0ustar esresr# Regression tests for doclifter TESTLOADS := $(shell ls *.man | sed '/.man/s///') test: regress @echo "No output (other than a testfile stem name) is good news." rebuild: @for file in $(TESTLOADS); do \ echo "Remaking $${file}.chk"; \ ../doclifter <$${file}.man >$${file}.chk 2>&1; \ done regress: @for file in $(TESTLOADS); do \ echo $${file}; \ if ../doclifter <$${file}.man >/tmp/regress$$; \ then diff -u $${file}.chk /tmp/regress$$; \ else echo "*** Nonzero return status on $${file}!"; exit 1; fi \ done @rm -f /tmp/regress doclifter-2.11/tests/console_ioctl.chk0000664000175000017500000010622512152465736016264 0ustar esresr 2009-02-28 CONSOLE_IOCTL 4 2009-02-28 Linux Linux Programmer's Manual console_ioctl ioctl's for console terminal and virtual consoles DESCRIPTION The following Linux-specific ioctl2 requests are supported. Each requires a third argument, assumed here to be argp. KDGETLED Get state of LEDs. argp points to a char. The lower three bits of *argp are set to the state of the LEDs, as follows: LED_CAP 0x04 caps lock led LEC_NUM 0x02 num lock led LED_SCR 0x01 scroll lock led KDSETLED Set the LEDs. The LEDs are set to correspond to the lower three bits of argp. However, if a higher order bit is set, the LEDs revert to normal: displaying the state of the keyboard functions of caps lock, num lock, and scroll lock. Before 1.1.54, the LEDs just reflected the state of the corresponding keyboard flags, and KDGETLED/KDSETLED would also change the keyboard flags. Since 1.1.54 the leds can be made to display arbitrary information, but by default they display the keyboard flags. The following two ioctl's are used to access the keyboard flags. KDGKBLED Get keyboard flags CapsLock, NumLock, ScrollLock (not lights). argp points to a char which is set to the flag state. The low order three bits (mask 0x7) get the current flag state, and the low order bits of the next nibble (mask 0x70) get the default flag state. (Since 1.1.54.) KDSKBLED Set keyboard flags CapsLock, NumLock, ScrollLock (not lights). argp has the desired flag state. The low order three bits (mask 0x7) have the flag state, and the low order bits of the next nibble (mask 0x70) have the default flag state. (Since 1.1.54.) KDGKBTYPE Get keyboard type. This returns the value KB_101, defined as 0x02. KDADDIO Add I/O port as valid. Equivalent to ioperm(arg,1,1). KDDELIO Delete I/O port as valid. Equivalent to ioperm(arg,1,0). KDENABIO Enable I/O to video board. Equivalent to ioperm(0x3b4, 0x3df-0x3b4+1, 1). KDDISABIO Disable I/O to video board. Equivalent to ioperm(0x3b4, 0x3df-0x3b4+1, 0). KDSETMODE Set text/graphics mode. argp is one of these: KD_TEXT 0x00 KD_GRAPHICS 0x01 KDGETMODE Get text/graphics mode. argp points to a long which is set to one of the above values. KDMKTONE Generate tone of specified length. The lower 16 bits of argp specify the period in clock cycles, and the upper 16 bits give the duration in msec. If the duration is zero, the sound is turned off. Control returns immediately. For example, argp = (125<<16) + 0x637 would specify the beep normally associated with a ctrl-G. (Thus since 0.99pl1; broken in 2.1.49-50.) KIOCSOUND Start or stop sound generation. The lower 16 bits of argp specify the period in clock cycles (that is, argp = 1193180/frequency). argp = 0 turns sound off. In either case, control returns immediately. GIO_CMAP Get the current default color map from kernel. argp points to a 48-byte array. (Since 1.3.3.) PIO_CMAP Change the default text-mode color map. argp points to a 48-byte array which contains, in order, the Red, Green, and Blue values for the 16 available screen colors: 0 is off, and 255 is full intensity. The default colors are, in order: black, dark red, dark green, brown, dark blue, dark purple, dark cyan, light grey, dark grey, bright red, bright green, yellow, bright blue, bright purple, bright cyan and white. (Since 1.3.3.) GIO_FONT Gets 256-character screen font in expanded form. argp points to an 8192 byte array. Fails with error code EINVAL if the currently loaded font is a 512-character font, or if the console is not in text mode. GIO_FONTX Gets screen font and associated information. argp points to a struct consolefontdesc (see PIO_FONTX). On call, the charcount field should be set to the maximum number of characters that would fit in the buffer pointed to by chardata. On return, the charcount and charheight are filled with the respective data for the currently loaded font, and the chardata array contains the font data if the initial value of charcount indicated enough space was available; otherwise the buffer is untouched and errno is set to ENOMEM. (Since 1.3.1.) PIO_FONT Sets 256-character screen font. Load font into the EGA/VGA character generator. argp points to a 8192 byte map, with 32 bytes per character. Only first N of them are used for an 8xN font (0 < N <= 32). This call also invalidates the Unicode mapping. PIO_FONTX Sets screen font and associated rendering information. argp points to a struct consolefontdesc { unsigned short charcount; /* characters in font (256 or 512) */ unsigned short charheight; /* scan lines per character (1-32) */ char *chardata; /* font data in expanded form */ }; If necessary, the screen will be appropriately resized, and SIGWINCH sent to the appropriate processes. This call also invalidates the Unicode mapping. (Since 1.3.1.) PIO_FONTRESET Resets the screen font, size and Unicode mapping to the bootup defaults. argp is unused, but should be set to NULL to ensure compatibility with future versions of Linux. (Since 1.3.28.) GIO_SCRNMAP Get screen mapping from kernel. argp points to an area of size E_TABSZ, which is loaded with the font positions used to display each character. This call is likely to return useless information if the currently loaded font is more than 256 characters. GIO_UNISCRNMAP Get full Unicode screen mapping from kernel. argp points to an area of size E_TABSZ*sizeof(unsigned short), which is loaded with the Unicodes each character represent. A special set of Unicodes, starting at U+F000, are used to represent "direct to font" mappings. (Since 1.3.1.) PIO_SCRNMAP Loads the "user definable" (fourth) table in the kernel which maps bytes into console screen symbols. argp points to an area of size E_TABSZ. PIO_UNISCRNMAP Loads the "user definable" (fourth) table in the kernel which maps bytes into Unicodes, which are then translated into screen symbols according to the currently loaded Unicode-to-font map. Special Unicodes starting at U+F000 can be used to map directly to the font symbols. (Since 1.3.1.) GIO_UNIMAP Get Unicode-to-font mapping from kernel. argp points to a struct unimapdesc { unsigned short entry_ct; struct unipair *entries; }; where entries points to an array of struct unipair { unsigned short unicode; unsigned short fontpos; }; (Since 1.1.92.) PIO_UNIMAP Put unicode-to-font mapping in kernel. argp points to a struct unimapdesc. (Since 1.1.92) PIO_UNIMAPCLR Clear table, possibly advise hash algorithm. argp points to a struct unimapinit { unsigned short advised_hashsize; /* 0 if no opinion */ unsigned short advised_hashstep; /* 0 if no opinion */ unsigned short advised_hashlevel; /* 0 if no opinion */ }; (Since 1.1.92.) KDGKBMODE Gets current keyboard mode. argp points to a long which is set to one of these: K_RAW 0x00 K_XLATE 0x01 K_MEDIUMRAW 0x02 K_UNICODE 0x03 KDSKBMODE Sets current keyboard mode. argp is a long equal to one of the above values. KDGKBMETA Gets meta key handling mode. argp points to a long which is set to one of these: K_METABIT 0x03 set high order bit K_ESCPREFIX 0x04 escape prefix KDSKBMETA Sets meta key handling mode. argp is a long equal to one of the above values. KDGKBENT Gets one entry in key translation table (keycode to action code). argp points to a struct kbentry { unsigned char kb_table; unsigned char kb_index; unsigned short kb_value; }; with the first two members filled in: kb_table selects the key table (0 <= kb_table < MAX_NR_KEYMAPS), and kb_index is the keycode (0 <= kb_index < NR_KEYS). kb_value is set to the corresponding action code, or K_HOLE if there is no such key, or K_NOSUCHMAP if kb_table is invalid. KDSKBENT Sets one entry in translation table. argp points to a struct kbentry. KDGKBSENT Gets one function key string. argp points to a struct kbsentry { unsigned char kb_func; unsigned char kb_string[512]; }; kb_string is set to the (null-terminated) string corresponding to the kb_functh function key action code. KDSKBSENT Sets one function key string entry. argp points to a struct kbsentry. KDGKBDIACR Read kernel accent table. argp points to a struct kbdiacrs { unsigned int kb_cnt; struct kbdiacr kbdiacr[256]; }; where kb_cnt is the number of entries in the array, each of which is a struct kbdiacr { unsigned char diacr; unsigned char base; unsigned char result; }; KDGETKEYCODE Read kernel keycode table entry (scan code to keycode). argp points to a struct kbkeycode { unsigned int scancode; unsigned int keycode; }; keycode is set to correspond to the given scancode. (89 <= scancode <= 255 only. For 1 <= scancode <= 88, keycode==scancode.) (Since 1.1.63.) KDSETKEYCODE Write kernel keycode table entry. argp points to a struct kbkeycode. (Since 1.1.63.) KDSIGACCEPT The calling process indicates its willingness to accept the signal argp when it is generated by pressing an appropriate key combination. (1 <= argp <= NSIG). (See spawn_console() in linux/drivers/char/keyboard.c.) VT_OPENQRY Returns the first available (non-opened) console. argp points to an int which is set to the number of the vt (1 <= *argp <= MAX_NR_CONSOLES). VT_GETMODE Get mode of active vt. argp points to a struct vt_mode { char mode; /* vt mode */ char waitv; /* if set, hang on writes if not active */ short relsig; /* signal to raise on release req */ short acqsig; /* signal to raise on acquisition */ short frsig; /* unused (set to 0) */ }; which is set to the mode of the active vt. mode is set to one of these values: VT_AUTO auto vt switching VT_PROCESS process controls switching VT_ACKACQ acknowledge switch VT_SETMODE Set mode of active vt. argp points to a struct vt_mode. VT_GETSTATE Get global vt state info. argp points to a struct vt_stat { unsigned short v_active; /* active vt */ unsigned short v_signal; /* signal to send */ unsigned short v_state; /* vt bit mask */ }; For each vt in use, the corresponding bit in the v_state member is set. (Kernels 1.0 through 1.1.92.) VT_RELDISP Release a display. VT_ACTIVATE Switch to vt argp (1 <= argp <= MAX_NR_CONSOLES). VT_WAITACTIVE Wait until vt argp has been activated. VT_DISALLOCATE Deallocate the memory associated with vt argp. (Since 1.1.54.) VT_RESIZE Set the kernel's idea of screensize. argp points to a struct vt_sizes { unsigned short v_rows; /* # rows */ unsigned short v_cols; /* # columns */ unsigned short v_scrollsize; /* no longer used */ }; Note that this does not change the videomode. See resizecons8. (Since 1.1.54.) VT_RESIZEX Set the kernel's idea of various screen parameters. argp points to a struct vt_consize { unsigned short v_rows; /* number of rows */ unsigned short v_cols; /* number of columns */ unsigned short v_vlin; /* number of pixel rows on screen */ unsigned short v_clin; /* number of pixel rows per character */ unsigned short v_vcol; /* number of pixel columns on screen */ unsigned short v_ccol; /* number of pixel columns per character */ }; Any parameter may be set to zero, indicating "no change", but if multiple parameters are set, they must be self-consistent. Note that this does not change the videomode. See resizecons8. (Since 1.3.3.) The action of the following ioctls depends on the first byte in the struct pointed to by argp, referred to here as the subcode. These are legal only for the superuser or the owner of the current tty. TIOCLINUX, subcode=0 Dump the screen. Disappeared in 1.1.92. (With kernel 1.1.92 or later, read from /dev/vcsN or /dev/vcsaN instead.) TIOCLINUX, subcode=1 Get task information. Disappeared in 1.1.92. TIOCLINUX, subcode=2 Set selection. argp points to a struct { char subcode; short xs, ys, xe, ye; short sel_mode; }; xs and ys are the starting column and row. xe and ye are the ending column and row. (Upper left corner is row=column=1.) sel_mode is 0 for character-by-character selection, 1 for word-by-word selection, or 2 for line-by-line selection. The indicated screen characters are highlighted and saved in the static array sel_buffer in devices/char/console.c. TIOCLINUX, subcode=3 Paste selection. The characters in the selection buffer are written to fd. TIOCLINUX, subcode=4 Unblank the screen. TIOCLINUX, subcode=5 Sets contents of a 256-bit look up table defining characters in a "word", for word-by-word selection. (Since 1.1.32.) TIOCLINUX, subcode=6 argp points to a char which is set to the value of the kernel variable shift_state. (Since 1.1.32.) TIOCLINUX, subcode=7 argp points to a char which is set to the value of the kernel variable report_mouse. (Since 1.1.33.) TIOCLINUX, subcode=8 Dump screen width and height, cursor position, and all the character-attribute pairs. (Kernels 1.1.67 through 1.1.91 only. With kernel 1.1.92 or later, read from /dev/vcsa* instead.) TIOCLINUX, subcode=9 Restore screen width and height, cursor position, and all the character-attribute pairs. (Kernels 1.1.67 through 1.1.91 only. With kernel 1.1.92 or later, write to /dev/vcsa* instead.) TIOCLINUX, subcode=10 Handles the Power Saving feature of the new generation of monitors. VESA screen blanking mode is set to argp[1], which governs what screen blanking does: 0: Screen blanking is disabled. 1: The current video adapter register settings are saved, then the controller is programmed to turn off the vertical synchronization pulses. This puts the monitor into "standby" mode. If your monitor has an Off_Mode timer, then it will eventually power down by itself. 2: The current settings are saved, then both the vertical and horizontal synchronization pulses are turned off. This puts the monitor into "off" mode. If your monitor has no Off_Mode timer, or if you want your monitor to power down immediately when the blank_timer times out, then you choose this option. (Caution: Powering down frequently will damage the monitor.) (Since 1.1.76.) RETURN VALUE On success, 0 is returned. On error -1 is returned, and errno is set. ERRORS errno may take on these values: EBADF The file descriptor is invalid. ENOTTY The file descriptor is not associated with a character special device, or the specified request does not apply to it. EINVAL The file descriptor or argp is invalid. EPERM Insufficient permission. NOTES Warning: Do not regard this man page as documentation of the Linux console ioctl's. This is provided for the curious only, as an alternative to reading the source. Ioctl's are undocumented Linux internals, liable to be changed without warning. (And indeed, this page more or less describes the situation as of kernel version 1.1.94; there are many minor and not-so-minor differences with earlier versions.) Very often, ioctl's are introduced for communication between the kernel and one particular well-known program (fdisk, hdparm, setserial, tunelp, loadkeys, selection, setfont, etc.), and their behavior will be changed when required by this particular program. Programs using these ioctl's will not be portable to other versions of UNIX, will not work on older versions of Linux, and will not work on future versions of Linux. Use POSIX functions. SEE ALSO dumpkeys1, kbd_mode1, loadkeys1, mknod1, setleds1, setmetamode1, execve2, fcntl2, ioperm2, termios3, console4, console_codes4, mt4, sd4, tty4, tty_ioctl4, ttyS4, vcs4, vcsa4, charsets7, mapscrn8, resizecons8, setfont8, /usr/include/linux/kd.h, /usr/include/linux/vt.h COLOPHON This page is part of release 3.35 of the Linux man-pages project. A description of the project, and information about reporting bugs, can be found at http://man7.org/linux/man-pages/. doclifter-2.11/tests/README0000664000175000017500000000143112152465736013612 0ustar esresrWhat the various test loads are for: basic.troff Test translation of some low-level troff idioms. capabilities.man: Tests lists nested within .RS. console_ioctl.man Tests table and display processing. corosync.conf.man Tests recognition of a bare filename in the Synopsis section docliftertest1.man General test for many features, including section and paragraph recognition and highlight mapping. grap.man Tests lists nested within .Bd/.Ed. groff_char.man: Tests recognition of every special chracter groff knows about. pax.man Test the kluge to avoid excess font closes in table entries. stringwidth.man Test a common evaluation case of the \w macro. xoxc.man Tests translation of .Xo/.Xc construct in BSD macros. sudoers.man: Tests the filename case in .Bl -literal. doclifter-2.11/tests/sudoers.man0000664000175000017500000000073312152465736015117 0ustar esresr.\" Seriously reduced subsection of sudoers.5 to test .Bl -literal .Dd July 16, 2012 .Dt SUDOERS 5 .Os Sudo 1.8.6p3 .Sh NAME .Nm sudoers .Nd default sudo security policy module .Sh DESCRIPTION .Pp Would match any file name beginning with a letter. .Pp Note that a forward slash .Pq Ql / will .Sy not be matched by wildcards used in the path name. This is to make a path like: .Bd -literal -offset 4n /usr/bin/* .Ed .Pp match .Pa /usr/bin/who but not .Pa /usr/bin/X11/xterm . doclifter-2.11/tests/sudoers.chk0000664000175000017500000000207212152465736015107 0ustar esresr SUDOERS 5 sudoers default sudo security policy module DESCRIPTION Would match any file name beginning with a letter. Note that a forward slash (Ql /) will not be matched by wildcards used in the path name. This is to make a path like: /usr/bin/* match /usr/bin/who but not /usr/bin/X11/xterm. doclifter-2.11/tests/corosync.conf.chk0000664000175000017500000007200112152465736016205 0ustar esresr 2006-03-28 COROSYNC_CONF 5 2006-03-28 corosync Man Page Corosync Cluster Engine Programmer's Manual corosync.conf corosync executive configuration file /etc/corosync.conf DESCRIPTION The corosync.conf instructs the corosync executive about various parameters needed to control the corosync executive. Empty lines and lines starting with # character are ignored. The configuration file consists of bracketed top level directives. The possible directive choices are: totem { } This top level directive contains configuration options for the totem protocol. logging { } This top level directive contains configuration options for logging. event { } This top level directive contains configuration options for the event service. It is also possible to specify the top level parameter compatibility. This directive indicates the level of compatibility requested by the user. The option whitetank can be specified to remain backward compatable with openais-0.80.z. The option none can be specified to only be compatable with corosync-1.Y.Z. Extra processing during configuration changes is required to remain backward compatable. The default is whitetank. (backwards compatibility) Within the totem directive, an interface directive is required. There is also one configuration option which is required: Within the interface sub-directive of totem there are four parameters which are required. There is one parameter which is optional. ringnumber This specifies the ring number for the interface. When using the redundant ring protocol, each interface should specify separate ring numbers to uniquely identify to the membership protocol which interface to use for which redundant ring. The ringnumber must start at 0. bindnetaddr This specifies the network address the corosync executive should bind to. For example, if the local interface is 192.168.5.92 with netmask 255.255.255.0, set bindnetaddr to 192.168.5.0. If the local interface is 192.168.5.92 with netmask 255.255.255.192, set bindnetaddr to 192.168.5.64, and so forth. This may also be an IPV6 address, in which case IPV6 networking will be used. In this case, the full address must be specified and there is no automatic selection of the network interface within a specific subnet as with IPv4. If IPv6 networking is used, the nodeid field must be specified. broadcast This is optional and can be set to yes. If it is set to yes, the broadcast address will be used for communication. If this option is set, mcastaddr should not be set. mcastaddr This is the multicast address used by corosync executive. The default should work for most networks, but the network administrator should be queried about a multicast address to use. Avoid 224.x.x.x because this is a "config" multicast address. This may also be an IPV6 multicast address, in which case IPV6 networking will be used. If IPv6 networking is used, the nodeid field must be specified. mcastport This specifies the UDP port number. It is possible to use the same multicast address on a network with the corosync services configured for different UDP ports. Please note corosync uses two UDP ports mcastport (for mcast receives) and mcastport - 1 (for mcast sends). If you have multiple clusters on the same network using the same mcastaddr please configure the mcastports with a gap. ttl This specifies the Time To Live (TTL). If you run your cluster on a routed network then the default of "1" will be too small. This option provides a way to increase this up to 255. The valid range is 0..255. Note that this is only valid on multicast transport types. member This specifies a member on the interface and used with the udpu transport only. Every node that should be a member of the membership should be specified as a separate member directive. Within the member directive there is a parameter memberaddr which specifies the ip address of one of the nodes. Within the totem directive, there are seven configuration options of which one is required, five are optional, and one is required when IPV6 is configured in the interface subdirective. The required directive controls the version of the totem configuration. The optional option unless using IPV6 directive controls identification of the processor. The optional options control secrecy and authentication, the redundant ring mode of operation, maximum network MTU, and number of sending threads, and the nodeid field. version This specifies the version of the configuration file. Currently the only valid version for this directive is 2. nodeid This configuration option is optional when using IPv4 and required when using IPv6. This is a 32 bit value specifying the node identifier delivered to the cluster membership service. If this is not specified with IPv4, the node id will be determined from the 32 bit IP address the system to which the system is bound with ring identifier of 0. The node identifier value of zero is reserved and should not be used. clear_node_high_bit This configuration option is optional and is only relevant when no nodeid is specified. Some openais clients require a signed 32 bit nodeid that is greater than zero however by default openais uses all 32 bits of the IPv4 address space when generating a nodeid. Set this option to yes to force the high bit to be zero and therefor ensure the nodeid is a positive signed 32 bit integer. WARNING: The clusters behavior is undefined if this option is enabled on only a subset of the cluster (for example during a rolling upgrade). secauth This specifies that HMAC/SHA1 authentication should be used to authenticate all messages. It further specifies that all data should be encrypted with the sober128 encryption algorithm to protect data from eavesdropping. Enabling this option adds a 36 byte header to every message sent by totem which reduces total throughput. Encryption and authentication consume 75% of CPU cycles in aisexec as measured with gprof when enabled. For 100mbit networks with 1500 MTU frame transmissions: A throughput of 9mb/sec is possible with 100% cpu utilization when this option is enabled on 3ghz cpus. A throughput of 10mb/sec is possible wth 20% cpu utilization when this optin is disabled on 3ghz cpus. For gig-e networks with large frame transmissions: A throughput of 20mb/sec is possible when this option is enabled on 3ghz cpus. A throughput of 60mb/sec is possible when this option is disabled on 3ghz cpus. The default is on. rrp_mode This specifies the mode of redundant ring, which may be none, active, or passive. Active replication offers slightly lower latency from transmit to delivery in faulty network environments but with less performance. Passive replication may nearly double the speed of the totem protocol if the protocol doesn't become cpu bound. The final option is none, in which case only one network interface will be used to operate the totem protocol. If only one interface directive is specified, none is automatically chosen. If multiple interface directives are specified, only active or passive may be chosen. netmtu This specifies the network maximum transmit unit. To set this value beyond 1500, the regular frame MTU, requires ethernet devices that support large, or also called jumbo, frames. If any device in the network doesn't support large frames, the protocol will not operate properly. The hosts must also have their mtu size set from 1500 to whatever frame size is specified here. Please note while some NICs or switches claim large frame support, they support 9000 MTU as the maximum frame size including the IP header. Setting the netmtu and host MTUs to 9000 will cause totem to use the full 9000 bytes of the frame. Then Linux will add a 18 byte header moving the full frame size to 9018. As a result some hardware will not operate properly with this size of data. A netmtu of 8982 seems to work for the few large frame devices that have been tested. Some manufacturers claim large frame support when in fact they support frame sizes of 4500 bytes. Increasing the MTU from 1500 to 8982 doubles throughput performance from 30MB/sec to 60MB/sec as measured with evsbench with 175000 byte messages with the secauth directive set to off. When sending multicast traffic, if the network frequently reconfigures, chances are that some device in the network doesn't support large frames. Choose hardware carefully if intending to use large frame support. The default is 1500. threads This directive controls how many threads are used to encrypt and send multicast messages. If secauth is off, the protocol will never use threaded sending. If secauth is on, this directive allows systems to be configured to use multiple threads to encrypt and send multicast messages. A thread directive of 0 indicates that no threaded send should be used. This mode offers best performance for non-SMP systems. The default is 0. vsftype This directive controls the virtual synchrony filter type used to identify a primary component. The preferred choice is YKD dynamic linear voting, however, for clusters larger then 32 nodes YKD consumes alot of memory. For large scale clusters that are created by changing the MAX_PROCESSORS_COUNT #define in the C code totem.h file, the virtual synchrony filter "none" is recommended but then AMF and DLCK services (which are currently experimental) are not safe for use. The default is ykd. The vsftype can also be set to none. transport This directive controls the transport mechanism used. If the interface to which corosync is binding is an RDMA interface such as RoCEE or Infiniband, the "iba" parameter may be specified. To avoid the use of multicast entirely, a unicast transport parameter "udpu" can be specified. This requires specifying the list of members that could potentially make up the membership before deployment. The default is udp. The transport type can also be set to udpu or iba. Within the totem directive, there are several configuration options which are used to control the operation of the protocol. It is generally not recommended to change any of these values without proper guidance and sufficient testing. Some networks may require larger values if suffering from frequent reconfigurations. Some applications may require faster failure detection times which can be achieved by reducing the token timeout. token This timeout specifies in milliseconds until a token loss is declared after not receiving a token. This is the time spent detecting a failure of a processor in the current configuration. Reforming a new configuration takes about 50 milliseconds in addition to this timeout. The default is 1000 milliseconds. token_retransmit This timeout specifies in milliseconds after how long before receiving a token the token is retransmitted. This will be automatically calculated if token is modified. It is not recommended to alter this value without guidance from the corosync community. The default is 238 milliseconds. hold This timeout specifies in milliseconds how long the token should be held by the representative when the protocol is under low utilization. It is not recommended to alter this value without guidance from the corosync community. The default is 180 milliseconds. token_retransmits_before_loss_const This value identifies how many token retransmits should be attempted before forming a new configuration. If this value is set, retransmit and hold will be automatically calculated from retransmits_before_loss and token. The default is 4 retransmissions. join This timeout specifies in milliseconds how long to wait for join messages in the membership protocol. The default is 50 milliseconds. send_join This timeout specifies in milliseconds an upper range between 0 and send_join to wait before sending a join message. For configurations with less then 32 nodes, this parameter is not necessary. For larger rings, this parameter is necessary to ensure the NIC is not overflowed with join messages on formation of a new ring. A reasonable value for large rings (128 nodes) would be 80msec. Other timer values must also change if this value is changed. Seek advice from the corosync mailing list if trying to run larger configurations. The default is 0 milliseconds. consensus This timeout specifies in milliseconds how long to wait for consensus to be achieved before starting a new round of membership configuration. The minimum value for consensus must be 1.2 * token. This value will be automatically calculated at 1.2 * token if the user doesn't specify a consensus value. For two node clusters, a consensus larger then the join timeout but less then token is safe. For three node or larger clusters, consensus should be larger then token. There is an increasing risk of odd membership changes, which stil guarantee virtual synchrony, as node count grows if consensus is less than token. The default is 1200 milliseconds. merge This timeout specifies in milliseconds how long to wait before checking for a partition when no multicast traffic is being sent. If multicast traffic is being sent, the merge detection happens automatically as a function of the protocol. The default is 200 milliseconds. downcheck This timeout specifies in milliseconds how long to wait before checking that a network interface is back up after it has been downed. The default is 1000 millseconds. fail_recv_const This constant specifies how many rotations of the token without receiving any of the messages when messages should be received may occur before a new configuration is formed. The default is 2500 failures to receive a message. seqno_unchanged_const This constant specifies how many rotations of the token without any multicast traffic should occur before the merge detection timeout is started. The default is 30 rotations. heartbeat_failures_allowed [HeartBeating mechanism] Configures the optional HeartBeating mechanism for faster failure detection. Keep in mind that engaging this mechanism in lossy networks could cause faulty loss declaration as the mechanism relies on the network for heartbeating. So as a rule of thumb use this mechanism if you require improved failure in low to medium utilized networks. This constant specifies the number of heartbeat failures the system should tolerate before declaring heartbeat failure e.g 3. Also if this value is not set or is 0 then the heartbeat mechanism is not engaged in the system and token rotation is the method of failure detection The default is 0 (disabled). max_network_delay [HeartBeating mechanism] This constant specifies in milliseconds the approximate delay that your network takes to transport one packet from one machine to another. This value is to be set by system engineers and please dont change if not sure as this effects the failure detection mechanism using heartbeat. The default is 50 milliseconds. window_size This constant specifies the maximum number of messages that may be sent on one token rotation. If all processors perform equally well, this value could be large (300), which would introduce higher latency from origination to delivery for very large rings. To reduce latency in large rings(16+), the defaults are a safe compromise. If 1 or more slow processor(s) are present among fast processors, window_size should be no larger then 256000 / netmtu to avoid overflow of the kernel receive buffers. The user is notified of this by the display of a retransmit list in the notification logs. There is no loss of data, but performance is reduced when these errors occur. The default is 50 messages. max_messages This constant specifies the maximum number of messages that may be sent by one processor on receipt of the token. The max_messages parameter is limited to 256000 / netmtu to prevent overflow of the kernel transmit buffers. The default is 17 messages. miss_count_const This constant defines the maximum number of times on receipt of a token a message is checked for retransmission before a retransmission occurs. This parameter is useful to modify for switches that delay multicast packets compared to unicast packets. The default setting works well for nearly all modern switches. The default is 5 messages. rrp_problem_count_timeout This specifies the time in milliseconds to wait before decrementing the problem count by 1 for a particular ring to ensure a link is not marked faulty for transient network failures. The default is 2000 milliseconds. rrp_problem_count_threshold This specifies the number of times a problem is detected with a link before setting the link faulty. Once a link is set faulty, no more data is transmitted upon it. Also, the problem counter is no longer decremented when the problem count timeout expires. A problem is detected whenever all tokens from the proceeding processor have not been received within the rrp_token_expired_timeout. The rrp_problem_count_threshold * rrp_token_expired_timeout should be atleast 50 milliseconds less then the token timeout, or a complete reconfiguration may occur. The default is 10 problem counts. rrp_problem_count_mcast_threshold This specifies the number of times a problem is detected with multicast before setting the link faulty for passive rrp mode. This variable is unused in active rrp mode. The default is 10 times rrp_problem_count_threshold. rrp_token_expired_timeout This specifies the time in milliseconds to increment the problem counter for the redundant ring protocol after not having received a token from all rings for a particular processor. This value will automatically be calculated from the token timeout and problem_count_threshold but may be overridden. It is not recommended to override this value without guidance from the corosync community. The default is 47 milliseconds. rrp_autorecovery_check_timeout This specifies the time in milliseconds to check if the failed ring can be auto-recovered. The default is 1000 milliseconds. Within the logging directive, there are several configuration options which are all optional. The following 3 options are valid only for the top level logging directive: timestamp This specifies that a timestamp is placed on all log messages. The default is off. fileline This specifies that file and line should be printed. The default is off. function_name This specifies that the code function name should be printed. The default is off. The following options are valid both for top level logging directive and they can be overriden in logger_subsys entries. to_stderr to_logfile to_syslog These specify the destination of logging output. Any combination of these options may be specified. Valid options are yes and no. The default is syslog and stderr. Please note, if you are using to_logfile and want to rotate the file, use logrotate(8) with the option copytruncate. eg. /var/log/corosync.log { missingok compress notifempty daily rotate 7 copytruncate } logfile If the to_logfile directive is set to yes , this option specifies the pathname of the log file. No default. logfile_priority This specifies the logfile priority for this particular subsystem. Ignored if debug is on. Possible values are: alert, crit, debug (same as debug = on), emerg, err, info, notice, warning. The default is: info. syslog_facility This specifies the syslog facility type that will be used for any messages sent to syslog. options are daemon, local0, local1, local2, local3, local4, local5, local6 & local7. The default is daemon. syslog_priority This specifies the syslog level for this particular subsystem. Ignored if debug is on. Possible values are: alert, crit, debug (same as debug = on), emerg, err, info, notice, warning. The default is: info. debug This specifies whether debug output is logged for this particular logger. The default is off. tags This specifies which tags should be traced for this particular logger. Set debug directive to on in order to enable tracing using tags. Values are specified using a vertical bar as a logical OR separator: enter|leave|trace1|trace2|trace3|... The default is none. Within the logging directive, logger_subsys directives are optional. Within the logger_subsys sub-directive, all of the above logging configuration options are valid and can be used to override the default settings. The subsys entry, described below, is mandatory to identify the subsystem. subsys This specifies the subsystem identity (name) for which logging is specified. This is the name used by a service in the log_init () call. E.g. 'CKPT'. This directive is required. FILES /etc/corosync.conf The corosync executive configuration file. SEE ALSO corosync_overview8, logrotate8 doclifter-2.11/tests/xoxc.man0000664000175000017500000000144112152465736014411 0ustar esresr.Dt MORE(1) "" "User Commands" .Os util-linux .Sh NAME .Nm more .Nd file perusal filter for crt viewing .Sh DESCRIPTION This is a partial manual page stripped down to exhibit translation of the Xo and Xc macros. .Bl -tag -width Ic .It Ic h No or Ic ? Help: display a summary of these commands. If you forget all the other commands, remember this one. .It Ic SPACE Display next k lines of text. Defaults to current screen size. .It Ic z Display next k lines of text. Defaults to current screen size. Argument becomes new default. .It Ic RETURN Display next k lines of text. Defaults to 1. Argument becomes new default. .It Ic d No or Ic \&^D Scroll k lines. Default is current scroll size, initially 11. Argument becomes new default. .It Xo .Ic q .No or .Ic Q .No or .Ic INTERRUPT .Xc Exit. .El doclifter-2.11/doclifter.xml0000664000175000017500000012655612152465736014305 0ustar esresr doclifter 1 Aug 16 2001 doclifter doclifter Documentation Tools doclifter translate troff requests into DocBook doclifter -e encoding -h hintfile -q -x -v -w -V -D token=type -I path -I path file Description doclifter translates documents written in troff macros to DocBook. Structural subsets of the requests in man7, mdoc7, ms7, me7, mm7, and troff1 are supported. The translation brings over all the structure of the original document at section, subsection, and paragraph level. Command and C function synopses are translated into DocBook markup, not just a verbatim display. Tables (TBL markup) are translated into DocBook table markup. PIC diagrams are translated into SVG. Troff-level information that might have structural implications is preserved in XML comments. Where possible, font-change macros are translated into structural markup. doclifter recognizes stereotyped patterns of markup and content (such as the use of italics in a FILES section to mark filenames) and lifts them. A means to edit, add, and save semantic hints about highlighting is supported. Some cliches are recognized and lifted to structural markup even without highlighting. Patterns recognized include such things as URLs, email addresses, man page references, and C program listings. The tag .in and .ti requests are passed through with complaints. They indicate presentation-level markup that doclifter cannot translate into structure; the output will require hand-fixing. The tag .ta is passed through with a complaint unless the immediarely following by text lines contains a tab, in which case the following span of lines containing tabs is lifted to a table. Under some circumstances, doclifter can even lift formatted manual pages and the text output produced by lynx1 from HTML. If it finds no macros in the input, but does find a NAME section header, it tries to interpret the plain text as a manual page (skipping boilerplate headers and footers generated by lynx1). Translations produced in this way will be prone to miss structural features, but this fallback is good enough for simple man pages. doclifter does not do a perfect job, merely a surprisingly good one. Final polish should be applied by a human being capable of recognizing patterns too subtle for a computer. But doclifter will almost always produce translations that are good enough to be usable before hand-hacking. See the Troubleshooting section for discussion of how to solve document conversion problems. Options If called without arguments doclifter acts as a filter, translating troff source input on standard input to DocBook markup on standard output. If called with arguments, each argument file is translated separately (but hints are retained, see below); the suffix .xml is given to the translated output. -h Name a file to which information on semantic hints gathered during analysis should be written. -D The allows you to post a hint. This may be useful, for example, if doclifter is mis-parsing a synopsis because it doesn't recognize a token as a command. This hint is merged after hints in the input source have been read. -I The option adds its argument to the include path used when docfilter searches for inclusions. The include path is initially just the current directory. -e The allows you to set the encoding field to be emitted in the output XML. It defaults to ISO-8859-1 (Latin-1). -q Normally, requests that doclifter could not interpret (usually because they're presentation-level) are passed through to XML comments in the output. The -q option suppresses this. It also suppresses listing of macros. Messages about requests that are unrecognized or cannot be translated go to standard error whatever the state of this option. This option is intended to reduce clutter when you believe you have a clean lift of a document and want to lose the troff legacy. -x The -x option requests that doclifter generated DocBook version 5 compatible xml content, rather than its default DocBook version 4.4 output. Inclusions and entities may not be handled correctly with this switch enabled. -v The -v option makes doclifter noisier about what it's doing. This is mainly useful for debugging. -w Enable strict portability checking. Multiple instances of -w increase the strictness. See . -V With this option, the program emits a version message and exits. Translation Rules Overall, you can expect that font changes will be turned into Emphasis macros with a Remap attribute taken from the troff font name. The basic font names are R, I, B, U, CW, and SM. Troff and macro-package special character escapes are mapped into ISO character entities. When doclifter encounters a .so directive, it searches for the file. If it can get read access to the file, and open it, and the file consists entirely of command lines and comments, then it is included. If any of these conditions fails, an entity reference for it is generated. doclifter performs special parsing when it recognizes a display such as is generated by .DS/.DE. It repeatedly tries to parse first a function synopsis, and then plain text off what remains in the display. Thus, most inline C function prototypes will be lifted to structured markup. Some notes on specific translations: Man Translation doclifter does a good job on most man pages, It knows about the extended UR/UE/UN and URL requests supported under Linux. If any .UR request is present, it will translate these but not wrap URLs outide them with Ulink tags. It also knows about the extended .L (literal) font markup from Bell Labs Version 8, and its friends. The .TH macro is used to generate a RefMeta section. If present, the date/source/manual arguments (see man7) are wrapped in RefMiscInfo tag pairs with those class attributes. Note that doclifter does not change the date. doclifter performs special parsing when it recognizes a synopsis section. It repeatedly tries to parse first a function synopsis, then a command synopsis, and then plain text off what remains in the section. The following man macros are translated into emphasis tags with a remap attribute: .B, .I, .L, .BI, .BR, .BL, .IB, .IR, .IL, .RB, .RI, .RL, .LB, .LI, .LR, .SB, .SM. Some stereotyped patterns involving these macros are recognized and turned into semantic markup. The following macros are translated into paragraph breaks: .LP, .PP, .P, .HP, and the single-argument form of .IP. The two-argument form of .IP is translated either as a VariableList (usually) or ItemizedList (if the tag is the troff bullet or square character). The following macros are translated semantically: .SH,.SS, .TP, .UR, .UE, .UN, .IX. A .UN call just before .SH or .SS sets the ID for the new section. The \*R, \*(Tm, \*(lq, and \*(rq symbols are translated. The following (purely presentation-level) macros are ignored: .PD,.DT. The .RS/.RE macros are translated differently depending on whether or not they precede list markup. When .RS occurs just before .TP or .IP the result is nested lists. Otherwise, the .RS/.RE pair is translated into a Blockquote tag-pair. .DS/.DE is not part of the documented man macro set, but is recognized because it shows up with some frequency on legacy man pages from older Unixes. Certain extension macros originally defined under Ultrix are translated structurally, including those that occasionally show up on the manual pages of Linux and other open-source Unixes. .EX/.EE (and the synonyms .Ex/.Ee), .Ds/.De, .NT/.NE, .PN, and .MS are translated structurally. The following extension macros used by the X distribution are also recognized and translated structurally: .FD, .FN, .IN, .ZN, .hN, and .C{/.C} The .TA and .IN requests are ignored. When the man macros are active, any .Pp macro definition containing the request .PP will be ignored. and all instances of .Pp replaced with .PP. Similarly, .Tp will be replaced with .TP. This is the least painful way to deal with some frequently-encountered stereotyped wrapper definitions that would otherwise cause serious interpretation problems Known problem areas with man translation: Weird uses of .TP. These will sometime generate invalid XML and sometimes result in a FIXME comment in the generated XML (a warning message will also go to standard error). It is debatable how the man macros .HP and .IP without tag should be translated. We treat them as an ordinary paragraph break. We could visually simulate a hanging paragraph with list markup, but this would not be a structural translation. Pod2man Translation doclifter recognizes the extension macros produced by pod2man (.Sh, .Sp, .Ip, .Vb, .Ve) and translates them structurally. The results of lifting pages produced by pod2man should be checked carefully by eyeball, especially the rendering of command and function synopses. Pod2man generates rather perverse markup; doclifter's struggle to untangle it is sometimes in vain. If possible, generate your DocBook from the POD sources. There is a pod2docbook module on CPAN that does this. Tkman Translation doclifter recognizes the extension macros used by the Tcl/Tk documentation system: .AP, .AS, .BS, .BE, .CS, .CE, .DS, .DE, .SO, .SE, .UL, .VS, .VE. The .AP, .CS, .CE, .SO, .SE, .UL, .QW and .PQ macros are translated structurally. Mandoc Translation doclifter should be able to do an excellent job on most mdoc7 pages, because this macro package expresses a lot of semantic structure. Known problems with mandoc translation: All .Bd/.Ed display blocks are translated as LiteralLayout tag pairs. Ms Translation doclifter does a good job on most ms pages. One weak spot to watch out for is the generation of Author and Affiliation tags. The heuristics used to mine this information out of the .AU section work for authors who format their names in the way usual for English (e.g. "M. E. Lesk", "Eric S. Raymond") but are quite brittle. For a document to be recognized as containing ms markup, it must have the extension .ms. This avoids problems with false positives. The .TL, .AU, .AI, and .AE macros turn into article metainformation in the expected way. The .PP, .LP, .SH, and .NH macros turn into paragraph and section structure. The tagged form of .IP is translated either as a VariableList (usually) or ItemizedList (if the tag is the troff bullet or square character); the untagged version is treated as an ordinary paragraph break. The .DS/.DE pair is translated to a LiteralLayout tag pair. The .FS/.FE pair is translated to a Footnote tag pair. The .QP/.QS/.QE requests define BlockQuotes. The .UL font change is mapped to U. .SM and .LG become numeric plus or minus size steps suffixed to the Remap attribute. The .B1 and .B2 box macros are translated to a Sidebar tag pair. All macros relating to page footers, multicolumn mode, and keeps are ignored (.ND, .DA, .1C, .2C, .MC, .BX, .KS, .KE, .KF). The .R, .RS, and .RE macros are ignored as well. Me Translation Translation of me documents tends to produce crude results that need a lot of hand-hacking. The format has little usable structure, and documents written in it tend to use a lot of low-level troff macros; both these properties tend to confuse doclifter. For a document to be recognized as containing me markup, it must have the extension .me. This avoids problems with false positives. The following macros are translated into paragraph breaks: .lp, .pp. The .ip macro is translated into a VariableList. The .bp macro is translated into an ItemizedList. The .np macro is translated into an OrderedList. The b, i, and r fonts are mapped to emphasis tags with B, I, and R Remap attributes. The .rb ("real bold") font is treated the same as .b. .q(/.q) is translated structurally . Most other requests are ignored. Mm Translation Memorandum Macros documents translate well, as these macros carry a lot of structural information. The translation rules are tuned for Memorandum or Released Paper styles; information associated with external-letter style will be preserved in comments. For a document to be recognized as containing mm markup, it must have the extension .mm. This avoids problems with false positives. The following highlight macros are translated int Emphasis tags: .B, .I, .R, .BI, .BR, .IB, .IR, .RB, .RI. The following macros are structurally translated: .AE, .AF, .AL, .RL, .APP, .APPSK, .AS, .AT, .AU, .B1, .B2, .BE, .BL, .ML, .BS, .BVL, .VL, .DE, .DL .DS, .FE, .FS, .H, .HU, .IA, .IE, .IND, .LB, .LC, .LE, .LI, .P, .RF, .SM, .TL, .VERBOFF, .VERBON, .WA, .WE. The following macros are ignored:  .)E, .1C, .2C, .AST, .AV, .AVL, .COVER, .COVEND, .EF, .EH, .EDP, .EPIC, .FC, .FD, .HC, .HM, .GETR, .GETST, .HM, .INITI, .INITR, .INDP, .ISODATE, .MT, .NS, .ND, .OF, .OH, .OP, .PGFORM, .PGNH, .PE, .PF, .PH, .RP, .S, .SA, .SP, .SG, .SK, .TAB, .TB, .TC, .VM, .WC. The following macros generate warnings: .EC, .EX, .FG, .GETHN, .GETPN, .GETR, .GETST, .LT, .LD, .LO, .MOVE, .MULB, .MULN, .MULE, .NCOL, .nP, .PIC, .RD, .RS, .RE, .SETR  .BS/.BE and .IA/.IE pairs are passed through. The text inside them may need to be deleted or moved. The mark argument of .ML is ignored; the following list id formatted as a normal ItemizedList. The contents of .DS/.DE or .DF/.DE gets turned into a Screen display. Arguments controlling presentation-level formatting are ignored. Mwww Translation The mwww macros are an extension to the man macros supported by groff1 for producing web pages. The URL, FTP, MAILTO, FTP, IMAGE, TAG tags are translated structurally. The HTMLINDEX, BODYCOLOR, BACKGROUND, HTML, and LINE tags are ignored. TBL Translation All structural features of TBL tables are translated, including both horizontal and vertical spanning with ‘s’ and ‘^’. The ‘l’, ‘r’, and ‘c’ formats are supported; the ‘n’ column format is rendered as ‘r’. Line continuations with T{ and T} are handled correctly. So is .TH. The expand, box, doublebox, allbox, center, left, and right options are supported. The GNU synonyms frame and doubleframe are also recognized. But the distinction between single and double rules and boxes is lost. Table continuations (.T&) are not supported. If the first nonempty line of text immediately before a table is boldfaced, it is interpreted as a title for the table and the table is generated using a table and title. Otherwise the table is translated with informaltable. Most other presentation-level TBL commands are ignored. The ‘b’ format qualifier is processed, but point size and width qualifiers are not. Pic Translation PIC sections are translated to SVG. doclifter calls out to pic2plot1 to accomplish this; you must have that utility installed for PIC translation to work. Eqn Translation EQN sections are filtered into embedded MathML with eqn -TMathML if possible, otherwise passed through enclosed in LiteralLayout tags. After a delim statement has been seen, inline eqn delimiters are translated into an XML processing instruction. Exception: inline eqn equations consisting of a single character are translated to an Emphasis with a Role attribute of eqn. Troff Translation The troff translation is meant only to support interpretation of the macro sets. It is not useful standalone. The .nf and .fi macros are interpreted as literal-layout boundaries. Calls to the .so macro either cause inclusion or are translated into XML entity inclusions (see above). Calls to the .ul and .cu macros cause following lines to be wrapped in an Emphasis tag with a Remap attribute of "U". Calls to .ft generate corresponding start or end emphasis tags. Calls to .tr cause character translation on output. Calls to .bp generate a BeginPage tag (in paragraphed text only). Calls to .sp generate a paragraph break (in paragraphed text only). Calls to .ti wrap the following line in a BlockQuote These are the only troff requests we translate to DocBook. The rest of the troff emulation exists because macro packages use it internally to expand macros into elements that might be structural. Requests relating to macro definitions and strings (.ds, .as, .de, .am, .rm, .rn, .em) are processed and expanded. The .ig macro is also processed. Conditional macros (.if, .ie, .el) are handled. The built-in conditions o, n, t, e, and c are evaluated as if for nroff on page one of a document. The m, d, and r troff conditionals are also interpreted. String comparisons are evaluated by straight textual comparison. All numeric expressions evaluate to true. The extended groff requests cc, c2, ab, als, do, nop, and return and shift are interpreted. Its .PSPIC extension is translated into a MediaObject. The .tm macro writes its arguments to standard error (with ). The .pm macro reports on defined macros and strings. These facilities may aid in debugging your translation. Some troff escape sequences are lifted: The \e and \\ escapes become a bare backslash, \. a period, and \- a bare dash. The troff escapes \^, \`, \' \&, \0, and \| are lifted to equivalent ISO special spacing characters. A \ followed by space is translated to an ISO non-breaking space entity. A \~ is also translated to an ISO non-breaking space entity; properly this should be a space that can't be used for a linebreak but stretches like ordinary whitepace during line adjustment, but there is no ISO or Unicode entity for that. The \u and \d half-line motion vertical motion escapes, when paired, become Superscript or Subscript tags. The \c escape is handled as a line continuation. in circumstances where that matters (e.g. for token-pasting). The \f escape for font changes is translated in various context-dependent ways. First, doclifter looks for cliches involving font changes that have semantic meaning, and lifts to a structural tag. If it can't do that, it generates an Emphasis tag. The \m[] extension is translated into a phrase span with a remap attribute carrying the color. Note: Stylesheets typically won't render this! Some uses of the \o request are translated: pairs with a letter followed by one of the characters ` ' : ^ o ~ are translated to combining forms with diacriticals acute, grave, umlaut, circumflex, ring, and tilde respectively if the corresponding Latin-1 or Latin-2 character exists as an ISO literal. Other escapes than these will yield warnings or errors. All other troff requests are ignored but passed through into XML comments. A few (such as .ce) also trigger a warning message. Portability Checking When portability checking is enabled, doclifter emits portability warnings about markup which it can handle but which will break various other viewers and interpreters. At level 1, it will warn about constructions that would break man2html1, (the C program distributed with Linux man1, not the older and much less capable Perl script). A close derivative of this code is used in GNOME yelp. This should be the minimum level of portability you aim for, and corresponds to what is recommended on the groff_man7 manual page. At level 2, it will warn about constructions that will break portability back to the Unix classic tools (including long macro names and glyph references with \[]). Semantic analysis doclifter keeps two lists of semantic hints that it picks up from analyzing source documents (especially from parsing command and function synopses). The local list includes: Names of function formal arguments Names of command options Local hints are used to mark up the individual page from which they are gathered. The global list includes: Names of functions Names of commands Names of function return types If doclifter is applied to multiple files, the global list is retained in memory. You can dump a report of global hints at the end of the run with the option. The format of the hints is as follows:  .\" | mark <phrase> as <markup> where <phrase> is an item of text and <markup> is the DocBook markup text it should be wrapped with whenever it appeared either highlighted or as a word surrounded by whitespace in the source text. Hints derived from earlier files are also applied to later ones. This behavior may be useful when lifting collections of documents that apply to a function or command library. What should be more useful is the fact that a hints file dumped with can be one of the file arguments to doclifter; the code detects this special case and does not write XML output for such a file. Thus, a good procedure for lifting a large library is to generate a hints file with a first run, inspect it to delete false positives, and use it as the first input to a second run. It is also possible to include a hints file directly in a troff sourcefile. This may be useful if you want to enrich the file by stages before converting to XML. Troubleshooting doclifter tries to warn about problems that it can can diagnose but not fix by itself. When it says "look for FIXME", do that in the generated XML; the markup around that token may be wrong. Occasionally (less than 2% of the time) doclifter will produce invalid DocBook markup even from correct troff markup. Usually this results from strange constructions in the source page, or macro calls that are beyond the ability of doclifter's macro processor to get right. Here are some things to watch for, and how to fix them: Malformed command synopses. If you get a message that says "command synopsis parse failed", try rewriting the synopsis in your manual page source. The most common cause of failure is unbalanced [] groupings, a bug that can be very difficult to notice by eyeball. To assist with this, the error message includes a token number in parentheses indicating on which token the parse failed. For more information, use the -v option. This will trigger a dump telling you what the command synopsis looked like after preprocessing, and indicate on which token the parse failed (both with a token number and a caret sign inserted in the dump of the synopsis tokens). Try rewriting the synopsis in your manual page source. The most common cause of failure is unbalanced [] groupings, a bug that can be very difficult to notice by eyeball. To assist with this, the error token dump tries to insert ‘$’ at the point of the last nesting-depth increase, but the code that does this is failure-prone. Confusing macro calls. Some manual page authors replace standard requests (like .PP, .SH and .TP) with versions that do different things in nroff and troff environments. While doclifter tries to cope and usually does a good job, the quirks of [nt]roff are legion and confusing macro calls sometimes lead to bad XML being generated. A common symptom of such problems is unclosed Emphasis tags. Malformed list syntax. The manual-page parser can be confused by .TP constructs that have header tags but no following body. If the XML produced doesn't validate, and the problem seems to be a misplaced listitem tag, try using the verbose (-v) option. This will enable line-numbered warnings that may help you zero in on the problem. Section nesting problems with SS. The message "possible section nesting error" means that the program has seen two adjacent subsection headers. In man pages, subsections don't have a depth argument, so doclifter cannot be certain how subsections should be nested. Any subsection heading between the indicated line and the beginning of the next top-level section might be wrong and require correcting by hand. Bad output with no doclifter error message If you're translating a page that uses user-defined macros, and doclifter fails to complain about it but you get bad output, the first thing to do is simplify or eliminate the user-defined macros. Replace them with stock requests where possible. Improving Translation Quality There are a few constructions that are a good idea to check by hand after lifting a page. Look near the BlockQuote tags. The troff temporary indent request (.ti) is translated into a BlockQuote wrapper around the following line. Sometimes LiteralLayout or ProgramListing would be a better translation, but doclifter has no way to know this. It is not possible to unambiguously detect candidates for wrapping in a DocBook option tag in running text. If you care, you'll have to check for these and fix them by hand. Bugs And Limitations About 3% of man pages will either make this program throw error status 1 or generate invalid XML. In almost all such cases the misbehavior is triggered by markup bugs in the source that are too severe to be coped with. Equation number arguments of EQN calls are ignored. The function-synopsis parser is crude (it's not a compiler) and prone to errors. Function-synopsis markup should be checked carefully by a human. If a man page has both paragraphed text in a Synopsis section and also a body section before the Synopis section, bad things will happen. Running text (e.g., explanatory notes) at the end of a Synopsis section cannot reliably be distinguished from synopsis-syntax markup. (This problem is AI-complete.) Some firewalls put in to cope with common malformations in troff code mean that the tail end of a span between two \f{B,I,U,(CW} or .ft highlight changes may not be completely covered by corresponding Emphasis macros if (for example) the span crosses a boundary between filled and unfilled (.nf/.fi) text. The treatment of conditionals relies on the assumption that conditional macros never generate structural or font-highlight markup that differs between the if and else branches. This appears to be true of all the standard macro packages, but if you roll any of your own macros you're on your own. Macro definitions in a manual page NAME section are not interpreted. Uses of \c for line continuation sometimes are not translated, leaving the \c in the output XML. The program will print a warning when this occurs. It is not possible to unambiguously detect candidates for wrapping in a DocBook option tag in running text. If you care, you'll have to check for these and fix them by hand. The line numbers in doclifter error messages are unreliable in the presence of .EQ/.EN, .PS/.PE, and quantum fluctuations. Old macro sets There is a conflict between Berkeley ms's documented .P1 print-header-on-page request and an undocumented Bell Labs use for displayed program and equation listings. The ms translator uses the Bell Labs interpretation when .P2 is present in the document, and otherwise ignores the request. Return Values On successful completion, the program returns status 0. It returns 1 if some file or standard input could not be translated. It returns 2 if one of the input sources was a .so inclusion. It returns 3 if there is an error in reading or writing files. It returns 4 to indicate an internal error. It returns 5 when aborted by a keyboard interrupt. Note that a zero return does not guarantee that the output is valid DocBook. It will almost always (as in, more than 98% of cases) be syntactically valid XML, but in some rare cases fixups by hand may be necessary to meet the semantics of the DocBook DTD. Validation problems are most likely to occur with complicated list markup. Requirements The pic2plot1 utility must be installed in order to translate PIC diagrams to SVG. See Also man7, mdoc7, ms7, me7, mm7, mwww7, troff1. Author Eric S. Raymond esr@thyrsus.com There is a project web page at http://www.catb.org/~esr/doclifter/. doclifter-2.11/doclifter.10000664000175000017500000010403012152465736013624 0ustar esresr'\" t .\" Title: doclifter .\" Author: [see the "Author" section] .\" Generator: DocBook XSL Stylesheets v1.76.1 .\" Date: 06/01/2013 .\" Manual: Documentation Tools .\" Source: doclifter .\" Language: English .\" .TH "DOCLIFTER" "1" "06/01/2013" "doclifter" "Documentation Tools" .\" ----------------------------------------------------------------- .\" * Define some portability stuff .\" ----------------------------------------------------------------- .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .\" http://bugs.debian.org/507673 .\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" ----------------------------------------------------------------- .\" * set default formatting .\" ----------------------------------------------------------------- .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .\" ----------------------------------------------------------------- .\" * MAIN CONTENT STARTS HERE * .\" ----------------------------------------------------------------- .SH "NAME" doclifter \- translate troff requests into DocBook .SH "SYNOPSIS" .HP \w'\fBdoclifter\fR\ 'u \fBdoclifter\fR [\-e\ \fIencoding\fR] [\-h\ \fIhintfile\fR] [\-q] [\-x] [\-v] [\-w] [\-V] [\-D\ \fItoken=type\fR] [\-I\ \fIpath\fR] [\-I\ \fIpath\fR] \fIfile\fR... .SH "DESCRIPTION" .PP \fBdoclifter\fR translates documents written in troff macros to DocBook\&. Structural subsets of the requests in \fBman\fR(7), \fBmdoc\fR(7), \fBms\fR(7), \fBme\fR(7), \fBmm\fR(7), and \fBtroff\fR(1) are supported\&. .PP The translation brings over all the structure of the original document at section, subsection, and paragraph level\&. Command and C function synopses are translated into DocBook markup, not just a verbatim display\&. Tables (TBL markup) are translated into DocBook table markup\&. PIC diagrams are translated into SVG\&. Troff\-level information that might have structural implications is preserved in XML comments\&. .PP Where possible, font\-change macros are translated into structural markup\&. \fBdoclifter\fR recognizes stereotyped patterns of markup and content (such as the use of italics in a FILES section to mark filenames) and lifts them\&. A means to edit, add, and save semantic hints about highlighting is supported\&. .PP Some cliches are recognized and lifted to structural markup even without highlighting\&. Patterns recognized include such things as URLs, email addresses, man page references, and C program listings\&. .PP The tag \fB\&.in\fR and \fB\&.ti\fR requests are passed through with complaints\&. They indicate presentation\-level markup that \fBdoclifter\fR cannot translate into structure; the output will require hand\-fixing\&. .PP The tag \fB\&.ta\fR is passed through with a complaint unless the immediarely following by text lines contains a tab, in which case the following span of lines containing tabs is lifted to a table\&. .PP Under some circumstances, \fBdoclifter\fR can even lift formatted manual pages and the text output produced by \fBlynx\fR(1) from HTML\&. If it finds no macros in the input, but does find a NAME section header, it tries to interpret the plain text as a manual page (skipping boilerplate headers and footers generated by \fBlynx\fR(1))\&. Translations produced in this way will be prone to miss structural features, but this fallback is good enough for simple man pages\&. .PP \fBdoclifter\fR does not do a perfect job, merely a surprisingly good one\&. Final polish should be applied by a human being capable of recognizing patterns too subtle for a computer\&. But \fBdoclifter\fR will almost always produce translations that are good enough to be usable before hand\-hacking\&. .PP See the Troubleshooting section for discussion of how to solve document conversion problems\&. .SH "OPTIONS" .PP If called without arguments \fBdoclifter\fR acts as a filter, translating troff source input on standard input to DocBook markup on standard output\&. If called with arguments, each argument file is translated separately (but hints are retained, see below); the suffix \&.xml is given to the translated output\&. .PP \-h .RS 4 Name a file to which information on semantic hints gathered during analysis should be written\&. .RE .PP \-D .RS 4 The \fB\-D\fR allows you to post a hint\&. This may be useful, for example, if \fBdoclifter\fR is mis\-parsing a synopsis because it doesn\*(Aqt recognize a token as a command\&. This hint is merged after hints in the input source have been read\&. .RE .PP \-I .RS 4 The \fB\-I\fR option adds its argument to the include path used when docfilter searches for inclusions\&. The include path is initially just the current directory\&. .RE .PP \-e .RS 4 The \fB\-e\fR allows you to set the encoding field to be emitted in the output XML\&. It defaults to ISO\-8859\-1 (Latin\-1)\&. .RE .PP \-q .RS 4 Normally, requests that \fBdoclifter\fR could not interpret (usually because they\*(Aqre presentation\-level) are passed through to XML comments in the output\&. The \-q option suppresses this\&. It also suppresses listing of macros\&. Messages about requests that are unrecognized or cannot be translated go to standard error whatever the state of this option\&. This option is intended to reduce clutter when you believe you have a clean lift of a document and want to lose the troff legacy\&. .RE .PP \-x .RS 4 The \-x option requests that \fBdoclifter\fR generated DocBook version 5 compatible xml content, rather than its default DocBook version 4\&.4 output\&. Inclusions and entities may not be handled correctly with this switch enabled\&. .RE .PP \-v .RS 4 The \-v option makes \fBdoclifter\fR noisier about what it\*(Aqs doing\&. This is mainly useful for debugging\&. .RE .PP \-w .RS 4 Enable strict portability checking\&. Multiple instances of \-w increase the strictness\&. See the section called \(lqPORTABILITY CHECKING\(rq\&. .RE .PP \-V .RS 4 With this option, the program emits a version message and exits\&. .RE .SH "TRANSLATION RULES" .PP Overall, you can expect that font changes will be turned into Emphasis macros with a Remap attribute taken from the troff font name\&. The basic font names are R, I, B, U, CW, and SM\&. .PP Troff and macro\-package special character escapes are mapped into ISO character entities\&. .PP When \fBdoclifter\fR encounters a \fB\&.so\fR directive, it searches for the file\&. If it can get read access to the file, and open it, and the file consists entirely of command lines and comments, then it is included\&. If any of these conditions fails, an entity reference for it is generated\&. .PP \fBdoclifter\fR performs special parsing when it recognizes a display such as is generated by \fB\&.DS/\&.DE\fR\&. It repeatedly tries to parse first a function synopsis, and then plain text off what remains in the display\&. Thus, most inline C function prototypes will be lifted to structured markup\&. .PP Some notes on specific translations: .SS "Man Translation" .PP \fBdoclifter\fR does a good job on most man pages, It knows about the extended \fBUR\fR/\fBUE\fR/\fBUN\fR and \fBURL\fR requests supported under Linux\&. If any \fB\&.UR\fR request is present, it will translate these but not wrap URLs outide them with Ulink tags\&. It also knows about the extended \fB\&.L\fR (literal) font markup from Bell Labs Version 8, and its friends\&. .PP The \fB\&.TH\fR macro is used to generate a RefMeta section\&. If present, the date/source/manual arguments (see \fBman\fR(7)) are wrapped in RefMiscInfo tag pairs with those class attributes\&. Note that \fBdoclifter\fR does not change the date\&. .PP \fBdoclifter\fR performs special parsing when it recognizes a synopsis section\&. It repeatedly tries to parse first a function synopsis, then a command synopsis, and then plain text off what remains in the section\&. .PP The following man macros are translated into emphasis tags with a remap attribute: \fB\&.B\fR, \fB\&.I\fR, \fB\&.L\fR, \fB\&.BI\fR, \fB\&.BR\fR, \fB\&.BL\fR, \fB\&.IB\fR, \fB\&.IR\fR, \fB\&.IL\fR, \fB\&.RB\fR, \fB\&.RI\fR, \fB\&.RL\fR, \fB\&.LB\fR, \fB\&.LI\fR, \fB\&.LR\fR, \fB\&.SB\fR, \fB\&.SM\fR\&. Some stereotyped patterns involving these macros are recognized and turned into semantic markup\&. .PP The following macros are translated into paragraph breaks: \fB\&.LP\fR, \fB\&.PP\fR, \fB\&.P\fR, \fB\&.HP\fR, and the single\-argument form of \fB\&.IP\fR\&. .PP The two\-argument form of \fB\&.IP\fR is translated either as a VariableList (usually) or ItemizedList (if the tag is the troff bullet or square character)\&. .PP The following macros are translated semantically: \fB\&.SH\fR,\fB\&.SS\fR, \fB\&.TP\fR, \fB\&.UR\fR, \fB\&.UE\fR, \fB\&.UN\fR, \fB\&.IX\fR\&. A \fB\&.UN\fR call just before \fB\&.SH\fR or \fB\&.SS\fR sets the ID for the new section\&. .PP The \fB\e*R\fR, \fB\e*(Tm\fR, \fB\e*(lq\fR, and \fB\e*(rq\fR symbols are translated\&. .PP The following (purely presentation\-level) macros are ignored: \fB\&.PD\fR,\fB\&.DT\fR\&. .PP The \fB\&.RS\fR/\fB\&.RE\fR macros are translated differently depending on whether or not they precede list markup\&. When \fB\&.RS\fR occurs just before \fB\&.TP\fR or \fB\&.IP\fR the result is nested lists\&. Otherwise, the \fB\&.RS\fR/\fB\&.RE\fR pair is translated into a Blockquote tag\-pair\&. .PP \fB\&.DS\fR/\fB\&.DE\fR is not part of the documented man macro set, but is recognized because it shows up with some frequency on legacy man pages from older Unixes\&. .PP Certain extension macros originally defined under Ultrix are translated structurally, including those that occasionally show up on the manual pages of Linux and other open\-source Unixes\&. \fB\&.EX\fR/\fB\&.EE\fR (and the synonyms \fB\&.Ex\fR/\fB\&.Ee\fR), \fB\&.Ds\fR/\fB\&.De\fR, \fB\&.NT\fR/\fB\&.NE\fR, \fB\&.PN\fR, and \fB\&.MS\fR are translated structurally\&. .PP The following extension macros used by the X distribution are also recognized and translated structurally: \fB\&.FD\fR, \fB\&.FN\fR, \fB\&.IN\fR, \fB\&.ZN\fR, \fB\&.hN\fR, and \fB\&.C{\fR/\fB\&.C}\fR The \fB\&.TA\fR and \fB\&.IN\fR requests are ignored\&. .PP When the man macros are active, any \fB\&.Pp\fR macro definition containing the request \fB\&.PP\fR will be ignored\&. and all instances of \fB\&.Pp\fR replaced with \fB\&.PP\fR\&. Similarly, \fB\&.Tp\fR will be replaced with \fB\&.TP\fR\&. This is the least painful way to deal with some frequently\-encountered stereotyped wrapper definitions that would otherwise cause serious interpretation problems .PP Known problem areas with man translation: .sp .RS 4 .ie n \{\ \h'-04'\(bu\h'+03'\c .\} .el \{\ .sp -1 .IP \(bu 2.3 .\} Weird uses of \fB\&.TP\fR\&. These will sometime generate invalid XML and sometimes result in a FIXME comment in the generated XML (a warning message will also go to standard error)\&. .RE .sp .RS 4 .ie n \{\ \h'-04'\(bu\h'+03'\c .\} .el \{\ .sp -1 .IP \(bu 2.3 .\} It is debatable how the man macros \fB\&.HP\fR and \fB\&.IP\fR without tag should be translated\&. We treat them as an ordinary paragraph break\&. We could visually simulate a hanging paragraph with list markup, but this would not be a structural translation\&. .RE .SS "Pod2man Translation" .PP \fBdoclifter\fR recognizes the extension macros produced by \fBpod2man\fR (\fB\&.Sh\fR, \fB\&.Sp\fR, \fB\&.Ip\fR, \fB\&.Vb\fR, \fB\&.Ve\fR) and translates them structurally\&. .PP The results of lifting pages produced by \fBpod2man\fR should be checked carefully by eyeball, especially the rendering of command and function synopses\&. \fBPod2man\fR generates rather perverse markup; \fBdoclifter\fR\*(Aqs struggle to untangle it is sometimes in vain\&. .PP If possible, generate your DocBook from the POD sources\&. There is a pod2docbook module on CPAN that does this\&. .SS "Tkman Translation" .PP \fBdoclifter\fR recognizes the extension macros used by the Tcl/Tk documentation system: \fB\&.AP\fR, \fB\&.AS\fR, \fB\&.BS\fR, \fB\&.BE\fR, \fB\&.CS\fR, \fB\&.CE\fR, \fB\&.DS\fR, \fB\&.DE\fR, \fB\&.SO\fR, \fB\&.SE\fR, \fB\&.UL\fR, \fB\&.VS\fR, \fB\&.VE\fR\&. The \fB\&.AP\fR, \fB\&.CS\fR, \fB\&.CE\fR, \fB\&.SO\fR, \fB\&.SE\fR, \fB\&.UL\fR, \fB\&.QW\fR and \fB\&.PQ\fR macros are translated structurally\&. .SS "Mandoc Translation" .PP \fBdoclifter\fR should be able to do an excellent job on most \fBmdoc\fR(7) pages, because this macro package expresses a lot of semantic structure\&. .PP Known problems with mandoc translation: All \fB\&.Bd\fR/\fB\&.Ed\fR display blocks are translated as LiteralLayout tag pairs \&. .SS "Ms Translation" .PP \fBdoclifter\fR does a good job on most ms pages\&. One weak spot to watch out for is the generation of Author and Affiliation tags\&. The heuristics used to mine this information out of the \fB\&.AU\fR section work for authors who format their names in the way usual for English (e\&.g\&. "M\&. E\&. Lesk", "Eric S\&. Raymond") but are quite brittle\&. .PP For a document to be recognized as containing ms markup, it must have the extension \&.ms\&. This avoids problems with false positives\&. .PP The \fB\&.TL\fR, \fB\&.AU\fR, \fB\&.AI\fR, and \fB\&.AE\fR macros turn into article metainformation in the expected way\&. The \fB\&.PP\fR, \fB\&.LP\fR, \fB\&.SH\fR, and \fB\&.NH\fR macros turn into paragraph and section structure\&. The tagged form of \fB\&.IP\fR is translated either as a VariableList (usually) or ItemizedList (if the tag is the troff bullet or square character); the untagged version is treated as an ordinary paragraph break\&. .PP The \fB\&.DS\fR/\fB\&.DE\fR pair is translated to a LiteralLayout tag pair \&. The \fB\&.FS\fR/\fB\&.FE\fR pair is translated to a Footnote tag pair\&. The \fB\&.QP\fR/\fB\&.QS\fR/\fB\&.QE\fR requests define BlockQuotes\&. .PP The \fB\&.UL\fR font change is mapped to U\&. \fB\&.SM\fR and \fB\&.LG\fR become numeric plus or minus size steps suffixed to the Remap attribute\&. .PP The \fB\&.B1\fR and \fB\&.B2\fR box macros are translated to a Sidebar tag pair\&. .PP All macros relating to page footers, multicolumn mode, and keeps are ignored (\fB\&.ND\fR, \fB\&.DA\fR, \fB\&.1C\fR, \fB\&.2C\fR, \fB\&.MC\fR, \fB\&.BX\fR, \fB\&.KS\fR, \fB\&.KE\fR, \fB\&.KF\fR)\&. The \fB\&.R\fR, \fB\&.RS\fR, and \fB\&.RE\fR macros are ignored as well\&. .SS "Me Translation" .PP Translation of me documents tends to produce crude results that need a lot of hand\-hacking\&. The format has little usable structure, and documents written in it tend to use a lot of low\-level troff macros; both these properties tend to confuse \fBdoclifter\fR\&. .PP For a document to be recognized as containing me markup, it must have the extension \&.me\&. This avoids problems with false positives\&. .PP The following macros are translated into paragraph breaks: \fB\&.lp\fR, \fB\&.pp\fR\&. The \fB\&.ip\fR macro is translated into a VariableList\&. The \fB\&.bp\fR macro is translated into an ItemizedList\&. The \fB\&.np\fR macro is translated into an OrderedList\&. .PP The b, i, and r fonts are mapped to emphasis tags with B, I, and R Remap attributes\&. The \fB\&.rb\fR ("real bold") font is treated the same as \fB\&.b\fR\&. .PP \fB\&.q(\fR/\fB\&.q)\fR is translated structurally \&. .PP Most other requests are ignored\&. .SS "Mm Translation" .PP Memorandum Macros documents translate well, as these macros carry a lot of structural information\&. The translation rules are tuned for Memorandum or Released Paper styles; information associated with external\-letter style will be preserved in comments\&. .PP For a document to be recognized as containing mm markup, it must have the extension \&.mm\&. This avoids problems with false positives\&. .PP The following highlight macros are translated int Emphasis tags: \fB\&.B\fR, \fB\&.I\fR, \fB\&.R\fR, \fB\&.BI\fR, \fB\&.BR\fR, \fB\&.IB\fR, \fB\&.IR\fR, \fB\&.RB\fR, \fB\&.RI\fR\&. .PP The following macros are structurally translated: \fB\&.AE\fR, \fB\&.AF\fR, \fB\&.AL\fR, \fB\&.RL\fR, \fB\&.APP\fR, \fB\&.APPSK\fR, \fB\&.AS\fR, \fB\&.AT\fR, \fB\&.AU\fR, \fB\&.B1\fR, \fB\&.B2\fR, \fB\&.BE\fR, \fB\&.BL\fR, \fB\&.ML\fR, \fB\&.BS\fR, \fB\&.BVL\fR, \fB\&.VL\fR, \fB\&.DE\fR, \fB\&.DL\fR \fB\&.DS\fR, \fB\&.FE\fR, \fB\&.FS\fR, \fB\&.H\fR, \fB\&.HU\fR, \fB\&.IA\fR, \fB\&.IE\fR, \fB\&.IND\fR, \fB\&.LB\fR, \fB\&.LC\fR, \fB\&.LE\fR, \fB\&.LI\fR, \fB\&.P\fR, \fB\&.RF\fR, \fB\&.SM\fR, \fB\&.TL\fR, \fB\&.VERBOFF\fR, \fB\&.VERBON\fR, \fB\&.WA\fR, \fB\&.WE\fR\&. .PP The following macros are ignored: .PP \ \&\fB\&.)E\fR, \fB\&.1C\fR, \fB\&.2C\fR, \fB\&.AST\fR, \fB\&.AV\fR, \fB\&.AVL\fR, \fB\&.COVER\fR, \fB\&.COVEND\fR, \fB\&.EF\fR, \fB\&.EH\fR, \fB\&.EDP\fR, \fB\&.EPIC\fR, \fB\&.FC\fR, \fB\&.FD\fR, \fB\&.HC\fR, \fB\&.HM\fR, \fB\&.GETR\fR, \fB\&.GETST\fR, \fB\&.HM\fR, \fB\&.INITI\fR, \fB\&.INITR\fR, \fB\&.INDP\fR, \fB\&.ISODATE\fR, \fB\&.MT\fR, \fB\&.NS\fR, \fB\&.ND\fR, \fB\&.OF\fR, \fB\&.OH\fR, \fB\&.OP\fR, \fB\&.PGFORM\fR, \fB\&.PGNH\fR, \fB\&.PE\fR, \fB\&.PF\fR, \fB\&.PH\fR, \fB\&.RP\fR, \fB\&.S\fR, \fB\&.SA\fR, \fB\&.SP\fR, \fB\&.SG\fR, \fB\&.SK\fR, \fB\&.TAB\fR, \fB\&.TB\fR, \fB\&.TC\fR, \fB\&.VM\fR, \fB\&.WC\fR\&. .PP The following macros generate warnings: \fB\&.EC\fR, \fB\&.EX\fR, \fB\&.FG\fR, \fB\&.GETHN\fR, \fB\&.GETPN\fR, \fB\&.GETR\fR, \fB\&.GETST\fR, \fB\&.LT\fR, \fB\&.LD\fR, \fB\&.LO\fR, \fB\&.MOVE\fR, \fB\&.MULB\fR, \fB\&.MULN\fR, \fB\&.MULE\fR, \fB\&.NCOL\fR, \fB\&.nP\fR, \fB\&.PIC\fR, \fB\&.RD\fR, \fB\&.RS\fR, \fB\&.RE\fR, \fB\&.SETR\fR .PP \ \&\fB\&.BS\fR/\fB\&.BE\fR and \fB\&.IA\fR/\fB\&.IE\fR pairs are passed through\&. The text inside them may need to be deleted or moved\&. .PP The mark argument of \fB\&.ML\fR is ignored; the following list id formatted as a normal ItemizedList\&. .PP The contents of \fB\&.DS\fR/\fB\&.DE\fR or \fB\&.DF\fR/\fB\&.DE\fR gets turned into a Screen display\&. Arguments controlling presentation\-level formatting are ignored\&. .SS "Mwww Translation" .PP The mwww macros are an extension to the man macros supported by \fBgroff\fR(1) for producing web pages\&. .PP The \fBURL\fR, \fBFTP\fR, \fBMAILTO\fR, \fBFTP\fR, \fBIMAGE\fR, \fBTAG\fR tags are translated structurally\&. The \fBHTMLINDEX\fR, \fBBODYCOLOR\fR, \fBBACKGROUND\fR, \fBHTML\fR, and \fBLINE\fR tags are ignored\&. .SS "TBL Translation" .PP All structural features of TBL tables are translated, including both horizontal and vertical spanning with \(oqs\(cq and \(oq^\(cq\&. The \(oql\(cq, \(oqr\(cq, and \(oqc\(cq formats are supported; the \(oqn\(cq column format is rendered as \(oqr\(cq\&. Line continuations with T{ and T} are handled correctly\&. So is \fB\&.TH\fR\&. .PP The \fBexpand\fR, \fBbox\fR, \fBdoublebox\fR, \fBallbox\fR, \fBcenter\fR, \fBleft\fR, and \fBright\fR options are supported\&. The GNU synonyms \fBframe\fR and \fBdoubleframe\fR are also recognized\&. But the distinction between single and double rules and boxes is lost\&. .PP Table continuations (\&.T&) are not supported\&. .PP If the first nonempty line of text immediately before a table is boldfaced, it is interpreted as a title for the table and the table is generated using a table and title\&. Otherwise the table is translated with informaltable\&. .PP Most other presentation\-level TBL commands are ignored\&. The \(oqb\(cq format qualifier is processed, but point size and width qualifiers are not\&. .SS "Pic Translation" .PP PIC sections are translated to SVG\&. doclifter calls out to \fBpic2plot\fR(1) to accomplish this; you must have that utility installed for PIC translation to work\&. .SS "Eqn Translation" .PP EQN sections are filtered into embedded MathML with \fBeqn \-TMathML\fR if possible, otherwise passed through enclosed in LiteralLayout tags\&. After a delim statement has been seen, inline eqn delimiters are translated into an XML processing instruction\&. Exception: inline eqn equations consisting of a single character are translated to an Emphasis with a Role attribute of eqn\&. .SS "Troff Translation" .PP The troff translation is meant only to support interpretation of the macro sets\&. It is not useful standalone\&. .PP The \fB\&.nf\fR and \fB\&.fi\fR macros are interpreted as literal\-layout boundaries\&. Calls to the \fB\&.so\fR macro either cause inclusion or are translated into XML entity inclusions (see above)\&. Calls to the \fB\&.ul\fR and \fB\&.cu\fR macros cause following lines to be wrapped in an Emphasis tag with a Remap attribute of "U"\&. Calls to \fB\&.ft\fR generate corresponding start or end emphasis tags\&. Calls to \fB\&.tr\fR cause character translation on output\&. Calls to \fB\&.bp\fR generate a BeginPage tag (in paragraphed text only)\&. Calls to \fB\&.sp\fR generate a paragraph break (in paragraphed text only)\&. Calls to \fB\&.ti\fR wrap the following line in a BlockQuote These are the only troff requests we translate to DocBook\&. The rest of the troff emulation exists because macro packages use it internally to expand macros into elements that might be structural\&. .PP Requests relating to macro definitions and strings (\fB\&.ds\fR, \fB\&.as\fR, \fB\&.de\fR, \fB\&.am\fR, \fB\&.rm\fR, \fB\&.rn\fR, \fB\&.em\fR) are processed and expanded\&. The \fB\&.ig\fR macro is also processed\&. .PP Conditional macros (\fB\&.if\fR, \fB\&.ie\fR, \fB\&.el\fR) are handled\&. The built\-in conditions o, n, t, e, and c are evaluated as if for nroff on page one of a document\&. The m, d, and r troff conditionals are also interpreted\&. String comparisons are evaluated by straight textual comparison\&. All numeric expressions evaluate to true\&. .PP The extended groff requests \fBcc\fR, \fBc2\fR, \fBab\fR, \fBals\fR, \fBdo\fR, \fBnop\fR, and \fBreturn\fR and \fBshift\fR are interpreted\&. Its \fB\&.PSPIC\fR extension is translated into a MediaObject\&. .PP The \fB\&.tm\fR macro writes its arguments to standard error (with \fB\-t\fR)\&. The \fB\&.pm\fR macro reports on defined macros and strings\&. These facilities may aid in debugging your translation\&. .PP Some troff escape sequences are lifted: .sp .RS 4 .ie n \{\ \h'-04' 1.\h'+01'\c .\} .el \{\ .sp -1 .IP " 1." 4.2 .\} The \ee and \e\e escapes become a bare backslash, \e\&. a period, and \e\- a bare dash\&. .RE .sp .RS 4 .ie n \{\ \h'-04' 2.\h'+01'\c .\} .el \{\ .sp -1 .IP " 2." 4.2 .\} The troff escapes \e^, \e`, \e\*(Aq \e&, \e0, and \e| are lifted to equivalent ISO special spacing characters\&. .RE .sp .RS 4 .ie n \{\ \h'-04' 3.\h'+01'\c .\} .el \{\ .sp -1 .IP " 3." 4.2 .\} A \e followed by space is translated to an ISO non\-breaking space entity\&. .RE .sp .RS 4 .ie n \{\ \h'-04' 4.\h'+01'\c .\} .el \{\ .sp -1 .IP " 4." 4.2 .\} A \e~ is also translated to an ISO non\-breaking space entity; properly this should be a space that can\*(Aqt be used for a linebreak but stretches like ordinary whitepace during line adjustment, but there is no ISO or Unicode entity for that\&. .RE .sp .RS 4 .ie n \{\ \h'-04' 5.\h'+01'\c .\} .el \{\ .sp -1 .IP " 5." 4.2 .\} The \eu and \ed half\-line motion vertical motion escapes, when paired, become \fBSuperscript\fR or \fBSubscript\fR tags\&. .RE .sp .RS 4 .ie n \{\ \h'-04' 6.\h'+01'\c .\} .el \{\ .sp -1 .IP " 6." 4.2 .\} The \ec escape is handled as a line continuation\&. in circumstances where that matters (e\&.g\&. for token\-pasting)\&. .RE .sp .RS 4 .ie n \{\ \h'-04' 7.\h'+01'\c .\} .el \{\ .sp -1 .IP " 7." 4.2 .\} The \ef escape for font changes is translated in various context\-dependent ways\&. First, \fBdoclifter\fR looks for cliches involving font changes that have semantic meaning, and lifts to a structural tag\&. If it can\*(Aqt do that, it generates an Emphasis tag\&. .RE .sp .RS 4 .ie n \{\ \h'-04' 8.\h'+01'\c .\} .el \{\ .sp -1 .IP " 8." 4.2 .\} The \em[] extension is translated into a phrase span with a remap attribute carrying the color\&. Note: Stylesheets typically won\*(Aqt render this! .RE .sp .RS 4 .ie n \{\ \h'-04' 9.\h'+01'\c .\} .el \{\ .sp -1 .IP " 9." 4.2 .\} Some uses of the \eo request are translated: pairs with a letter followed by one of the characters ` \*(Aq : ^ o ~ are translated to combining forms with diacriticals acute, grave, umlaut, circumflex, ring, and tilde respectively if the corresponding Latin\-1 or Latin\-2 character exists as an ISO literal\&. .RE .PP Other escapes than these will yield warnings or errors\&. .PP All other troff requests are ignored but passed through into XML comments\&. A few (such as \fB\&.ce\fR) also trigger a warning message\&. .SH "PORTABILITY CHECKING" .PP When portability checking is enabled, \fBdoclifter\fR emits portability warnings about markup which it can handle but which will break various other viewers and interpreters\&. .sp .RS 4 .ie n \{\ \h'-04' 1.\h'+01'\c .\} .el \{\ .sp -1 .IP " 1." 4.2 .\} At level 1, it will warn about constructions that would break \fBman2html\fR(1), (the C program distributed with Linux \fBman\fR(1), not the older and much less capable Perl script)\&. A close derivative of this code is used in GNOME yelp\&. This should be the minimum level of portability you aim for, and corresponds to what is recommended on the \fBgroff_man\fR(7) manual page\&. .RE .sp .RS 4 .ie n \{\ \h'-04' 2.\h'+01'\c .\} .el \{\ .sp -1 .IP " 2." 4.2 .\} At level 2, it will warn about constructions that will break portability back to the Unix classic tools (including long macro names and glyph references with \e[])\&. .RE .SH "SEMANTIC ANALYSIS" .PP \fBdoclifter\fR keeps two lists of semantic hints that it picks up from analyzing source documents (especially from parsing command and function synopses)\&. The local list includes: .sp .RS 4 .ie n \{\ \h'-04'\(bu\h'+03'\c .\} .el \{\ .sp -1 .IP \(bu 2.3 .\} Names of function formal arguments .RE .sp .RS 4 .ie n \{\ \h'-04'\(bu\h'+03'\c .\} .el \{\ .sp -1 .IP \(bu 2.3 .\} Names of command options .RE .PP Local hints are used to mark up the individual page from which they are gathered\&. The global list includes: .sp .RS 4 .ie n \{\ \h'-04'\(bu\h'+03'\c .\} .el \{\ .sp -1 .IP \(bu 2.3 .\} Names of functions .RE .sp .RS 4 .ie n \{\ \h'-04'\(bu\h'+03'\c .\} .el \{\ .sp -1 .IP \(bu 2.3 .\} Names of commands .RE .sp .RS 4 .ie n \{\ \h'-04'\(bu\h'+03'\c .\} .el \{\ .sp -1 .IP \(bu 2.3 .\} Names of function return types .RE .PP If \fBdoclifter\fR is applied to multiple files, the global list is retained in memory\&. You can dump a report of global hints at the end of the run with the \fB\-h\fR option\&. The format of the hints is as follows: .sp .if n \{\ .RS 4 .\} .nf \ \&\&.\e" | mark as .fi .if n \{\ .RE .\} .PP where \fB\fR is an item of text and \fB\fR is the DocBook markup text it should be wrapped with whenever it appeared either highlighted or as a word surrounded by whitespace in the source text\&. .PP Hints derived from earlier files are also applied to later ones\&. This behavior may be useful when lifting collections of documents that apply to a function or command library\&. What should be more useful is the fact that a hints file dumped with \fB\-h\fR can be one of the file arguments to \fBdoclifter\fR; the code detects this special case and does not write XML output for such a file\&. Thus, a good procedure for lifting a large library is to generate a hints file with a first run, inspect it to delete false positives, and use it as the first input to a second run\&. .PP It is also possible to include a hints file directly in a troff sourcefile\&. This may be useful if you want to enrich the file by stages before converting to XML\&. .SH "TROUBLESHOOTING" .PP \fBdoclifter\fR tries to warn about problems that it can can diagnose but not fix by itself\&. When it says "look for FIXME", do that in the generated XML; the markup around that token may be wrong\&. .PP Occasionally (less than 2% of the time) \fBdoclifter\fR will produce invalid DocBook markup even from correct troff markup\&. Usually this results from strange constructions in the source page, or macro calls that are beyond the ability of \fBdoclifter\fR\*(Aqs macro processor to get right\&. Here are some things to watch for, and how to fix them: .SS "Malformed command synopses\&." .PP If you get a message that says "command synopsis parse failed", try rewriting the synopsis in your manual page source\&. The most common cause of failure is unbalanced [] groupings, a bug that can be very difficult to notice by eyeball\&. To assist with this, the error message includes a token number in parentheses indicating on which token the parse failed\&. .PP For more information, use the \-v option\&. This will trigger a dump telling you what the command synopsis looked like after preprocessing, and indicate on which token the parse failed (both with a token number and a caret sign inserted in the dump of the synopsis tokens)\&. Try rewriting the synopsis in your manual page source\&. The most common cause of failure is unbalanced [] groupings, a bug that can be very difficult to notice by eyeball\&. To assist with this, the error token dump tries to insert \(oq$\(cq at the point of the last nesting\-depth increase, but the code that does this is failure\-prone\&. .SS "Confusing macro calls\&." .PP Some manual page authors replace standard requests (like \fB\&.PP\fR, \fB\&.SH\fR and \fB\&.TP\fR) with versions that do different things in \fBnroff\fR and \fBtroff\fR environments\&. While \fBdoclifter\fR tries to cope and usually does a good job, the quirks of [nt]roff are legion and confusing macro calls sometimes lead to bad XML being generated\&. A common symptom of such problems is unclosed Emphasis tags\&. .SS "Malformed list syntax\&." .PP The manual\-page parser can be confused by \fB\&.TP\fR constructs that have header tags but no following body\&. If the XML produced doesn\*(Aqt validate, and the problem seems to be a misplaced listitem tag, try using the verbose (\-v) option\&. This will enable line\-numbered warnings that may help you zero in on the problem\&. .SS "Section nesting problems with SS\&." .PP The message "possible section nesting error" means that the program has seen two adjacent subsection headers\&. In man pages, subsections don\*(Aqt have a depth argument, so \fBdoclifter\fR cannot be certain how subsections should be nested\&. Any subsection heading between the indicated line and the beginning of the next top\-level section might be wrong and require correcting by hand\&. .SS "Bad output with no doclifter error message" .PP If you\*(Aqre translating a page that uses user\-defined macros, and doclifter fails to complain about it but you get bad output, the first thing to do is simplify or eliminate the user\-defined macros\&. Replace them with stock requests where possible\&. .SH "IMPROVING TRANSLATION QUALITY" .PP There are a few constructions that are a good idea to check by hand after lifting a page\&. .PP Look near the BlockQuote tags\&. The troff temporary indent request (\fB\&.ti\fR) is translated into a BlockQuote wrapper around the following line\&. Sometimes LiteralLayout or ProgramListing would be a better translation, but \fBdoclifter\fR has no way to know this\&. .PP It is not possible to unambiguously detect candidates for wrapping in a DocBook option tag in running text\&. If you care, you\*(Aqll have to check for these and fix them by hand\&. .SH "BUGS AND LIMITATIONS" .PP About 3% of man pages will either make this program throw error status 1 or generate invalid XML\&. In almost all such cases the misbehavior is triggered by markup bugs in the source that are too severe to be coped with\&. .PP Equation number arguments of EQN calls are ignored\&. .PP The function\-synopsis parser is crude (it\*(Aqs not a compiler) and prone to errors\&. Function\-synopsis markup should be checked carefully by a human\&. .PP If a man page has both paragraphed text in a Synopsis section and also a body section before the Synopis section, bad things will happen\&. .PP Running text (e\&.g\&., explanatory notes) at the end of a Synopsis section cannot reliably be distinguished from synopsis\-syntax markup\&. (This problem is AI\-complete\&.) .PP Some firewalls put in to cope with common malformations in troff code mean that the tail end of a span between two \fB\ef{B,I,U,(CW}\fR or \fB\&.ft\fR highlight changes may not be completely covered by corresponding Emphasis macros if (for example) the span crosses a boundary between filled and unfilled (\fB\&.nf\fR/\fB\&.fi\fR) text\&. .PP The treatment of conditionals relies on the assumption that conditional macros never generate structural or font\-highlight markup that differs between the if and else branches\&. This appears to be true of all the standard macro packages, but if you roll any of your own macros you\*(Aqre on your own\&. .PP Macro definitions in a manual page NAME section are not interpreted\&. .PP Uses of \ec for line continuation sometimes are not translated, leaving the \ec in the output XML\&. The program will print a warning when this occurs\&. .PP It is not possible to unambiguously detect candidates for wrapping in a DocBook option tag in running text\&. If you care, you\*(Aqll have to check for these and fix them by hand\&. .PP The line numbers in \fBdoclifter\fR error messages are unreliable in the presence of \fB\&.EQ/\&.EN\fR, \fB\&.PS/\&.PE\fR, and quantum fluctuations\&. .SH "OLD MACRO SETS" .PP There is a conflict between Berkeley ms\*(Aqs documented \fB\&.P1\fR print\-header\-on\-page request and an undocumented Bell Labs use for displayed program and equation listings\&. The \fBms\fR translator uses the Bell Labs interpretation when \fB\&.P2\fR is present in the document, and otherwise ignores the request\&. .SH "RETURN VALUES" .PP On successful completion, the program returns status 0\&. It returns 1 if some file or standard input could not be translated\&. It returns 2 if one of the input sources was a \fB\&.so\fR inclusion\&. It returns 3 if there is an error in reading or writing files\&. It returns 4 to indicate an internal error\&. It returns 5 when aborted by a keyboard interrupt\&. .PP Note that a zero return does not guarantee that the output is valid DocBook\&. It will almost always (as in, more than 98% of cases) be syntactically valid XML, but in some rare cases fixups by hand may be necessary to meet the semantics of the DocBook DTD\&. Validation problems are most likely to occur with complicated list markup\&. .SH "REQUIREMENTS" .PP The \fBpic2plot\fR(1) utility must be installed in order to translate PIC diagrams to SVG\&. .SH "SEE ALSO" .PP \fBman\fR(7), \fBmdoc\fR(7), \fBms\fR(7), \fBme\fR(7), \fBmm\fR(7), \fBmwww\fR(7), \fBtroff\fR(1)\&. .SH "AUTHOR" .PP Eric S\&. Raymond esr@thyrsus\&.com .PP There is a project web page at \m[blue]\fBhttp://www\&.catb\&.org/~esr/doclifter/\fR\m[]\&. doclifter-2.11/doclifter-logo.png0000664000175000017500000001546412152465736015222 0ustar esresr‰PNG  IHDR@?PÎÊsRGB®ÎébKGDÿÿÿ ½§“ pHYs  šœtIMEÚ ‚(Ú§´IDAThÞÍ›y”å¹î_íªÚóØ»Gº›nhšQ `D$¹*pôªqºÑ ˜˜£9®h¢9æ&9‘¬»2hrô3,oŒó=šhDÈQQÇÊÔMÓÐÝ»÷Ô{®=U}÷n9Ä«TLÞµú®ª]õ}O=ß;<ïW‚°sÎ9G;}Ù²Öá‘‘³ þ—Û½"‹ÙŠ¥‡cÐa·?ÂìÙ^{õÕ'¿yýõæòeË,ŽÁÊð9Þàodâ¯üÞ÷¾wâàààmCCC§æ t]§µ¥˲d&›Å®ë¢··¯ÏG{[ÛpsKË÷Ýn÷c7ýó?çŽæáX®À- Ü àÅ¿+¾ÿƒ´íÚ¹sЇÉd³rò¤IBÕ4ÌjM× ×Ö‹Fq{F“I2™ –iÒ:q"sæÌ‘¡š±uëÖÿiV«ÅÇ}tõ_\þ°ûWa™ç À‚ó,xxþ³@ù°£££­\µ Ÿß/^Ù´ »ÝŽ/`êÔ©ä²Y&¶¶ò¿.¹„eË–100À¯~ùKqèÐ!¹téR*ÕêÅ]ÝÝ÷}ؽ·€¦Â…Âã4 «páÐþ.–ÀÍkמ …ž3w.?¿õV^õUZZ[iniÁåra·Û"ÉpÑEQ[[Ëp$Â=Fã„ ²T* ·ÛÍŠ+®¹rÍšß|ýÏ´àA5GK(p©€ Ÿ%¶÷xü?þãB§ËõäË/¿Œªªbû¶m S(ˆŽŒ`Z–i²lùrÂá0ÿvÛmôõ÷sÆgÐÞÞ΄ÛíFÓ4Y,•κ|õê'Ÿyæ™èÕ,ø®€Åï{. ÕµðìZ°>³%ðûÇ×Î=ÿüæ~xåžzêu¿ßÿ{iYÊÕW_-*Õ*---x<FGG‰Çãô÷õÇÙ°n‹-âŠ5kØÝÕÅ-?û•j•Ó–.Å(M&…Y©ØÉä¦e¹Xû_Îù°¨;~þ³c€¦ªÿT(kjl\S0Œæ7’ÏçÙ¿?¹\ŽPM “'OÆét¢©ª,•JBQÚÚÛYµr%çœ{.ñxœ/¼€Y­2ó„(  …0MÓïp8œ÷Þ{ïʱ5~“€E²*Ÿ% ÔT:ý³P0øÎî®.„U§Óy¨ÏÇ‚¡ÐH)ŸW:§L騫¯ÿÂÜyóšò¹œÈåóx=9iòd¡¨*»E‹ñÀý÷óú믳xÉÎZ±‚Ôè(£££Ô„ꢭlšÓf;KÀ—>bLçOúLhoo¿Øåt¾½îÙgÕ³Ï>Û<ûì³Ó‹-Ê !L€—6mrïܵ«CUÕÓ5]?+¨ëŸ³,Ë[5MY0 tMÙ\Ž\.‡¢(lX¿ži3fÐ>i¯¿ö“&M’éL¦¤]u•ny~ÏGxe?p¡„ç;¿þÕ¯žzïŸwwïæg?ýé_\pÚ©§æíñL¦ë¾ßýîžJ¥ðz½7är¹oœ¼p!†a°aýzl6Õj•îînvíÜI¡P@( †aÓ4å[o¬8šAYpŽOü›%Bï·°ÏWR@ê‡7ß¼vÞܹmÃÃÃ+Ÿ~úivïÞ¢(èºN:&›Íâõz1MSJ)E¥Tj7ÂáKà:ÊØì¾$a½€Òß=ú¨­©±Ñáp8¼£££—y<žÓÞݳ‡—7mb4‘  ‘N§1 CšÕªè˜2…wwïÆãõb+—}ÕTjñ± lœOŽûƒ¿RJå;:=.×é§si©T:3\Sãë˜<EQdkK‹ˆG£$ Ê岬««Ó§Oèšællnö·¶¶Òìõ:ŒÑQÍ{lšgÜlPøÌ¡#m×®]ž+Ö¬¹ö`oï¯ìÇ%M3'µ·ÛC¡”RŠºº:1cÆ N^´ˆ·ÞzK& 1¥³³°`Á‚[ÃáðÁL&s’ßïgv:mkzä”Ç48 “¼»º>s|õ«_ÕW¯^}÷ðððù¯nÞ¬<ýôÓtL™"¿öµ¯‰eË– ¿ßeY¨šF&–ýýÂívÓÒÒrï¢Å‹ŸO§RÙy³gŸÕ0<ì«»å–0æ±z\\ a€üg @*•zº¿¿ÿ ›Í†ªª2‹‰‘hTô÷õqê©§2mÆ "‘;wìàà"“ɰxÉ’=_rÉí'ÌœÙ+„0SóçÿFù±cpð“Œq… ÿ<ú™¥‡×\sÍÍÓ¦M“'N”S¦L±¦OŸ.›››emm­ôûýÒãñÈ`0(}>ŸÔu]:kÖ¬YûþüüóGÐ7d ä'ý“ð„üˆüáS«¥”+WÆÓ©”â„Ïë!èï'’Ëå(—ÇÊüJ¥‚¢(444È)Y)¥OJI0 šN.¯TüS‡†”ûö¡”Ëh€b{†+¡¨ÀW«iø|>êgÎd‘¦qz.ÇÂþ~€#›åh¢‡5þ$ s\£Àüùó ¥œêv»…ÍfÃåtâöxpØí¸\.b±årùp‘$¥d``!ªª ¯×+EÁáp%›Í²/—ã‰D‚ך›ÉΜIÆãÁítâÍdŽˆ‰ìûì8® Ðu½YZ–ðù|Äb1ªÕ*áp¯ßO©T¢¶®ŸÏGµZ¥¯¯L&ôiÓˆÇã†A©TÂ4MEAJ‰¢(”Ëet]GZ»£Q’ãYcK8̵µœ‰àîïÿH(c,xZ@ú¸1`Á‚+3Ùlg¹\‰D‡ÝŽP¤eQ*•ðz½øƒAº»»èïÇáp‰D(—ËT*,Ë"“É`š&Ùl—Ë…aX–uMUéííe0f»®óJ0HcG? ízÖ¶㦠677o2;::ðy½465Q4 úúú°Ûíøý~4Mã„Ù³™;oÞa–(6Ùl–jµJ¥R!àr¹ð¸Ý(Š‚ªª¸\.œN'ÅR ·Û]×Éär¼60À×ùÁyçÑ=cÆ_c6î ‚Ç3 ¸Î;ÿüxCCƒ³k×.Á ÓgÌàÀþýK%Âá0#‘¦iR©TB°{÷n‰---L:•«Va †Aww7ý}}ŒŽŽâöxؾmv»J¥‚MUilh ¢k-­­Ô„B\èrqþæÍxc±bAYpÏqE¯»þú{K¥ÒêR±(ëD>ŸÇ2MFGGI¥R̘5‹Û¶Q©T0Ç#D©XdúŒ\yå•tN™Â¡áaö÷öòðCñ‡'ž@ACCÉd’B¡€ÍfCÓ´1ÅÙç#J¥˜:möR‰k#Nܵ ½X|ÿ ÿ |Y@â¸Ô¿½ë®ÿZ·nÝeÍÍÍî|.' †!,Ë¢®¾žx,†">¿ŸB±ˆešÔÕÕqúòå¬Z¹’Ó§³³«‹Ý]]Üß}üáÉ'1M“jµJ*•bÚôé¸Ýnl6•J…l6K:Æf³ÑØØ8ƤæfœÁ ë¥$çóÑY­âÈçdA›~[[gè¦ï~÷bÓ4Ée³R(ŠM¥¨¯«cwWBQ˜3guõõø|>V­\IãøÛ}yÓ&zè!¶nÙBWWŠ¢ŒùEáä“O¦­½x,†Q,280@.—Ãg@:¦¹¹™™³fáõxعs'm¹ÿ˜Ïsòž=G‚ð‚—ˆ—jðw÷ÜÓ³¯§§]Øl³Ë¥’œ1}ºˆD"ƒA ù<ùBŸÏǤövòùÏ¡C‡0 ƒT*E2™D³fÍâýûyÕåbR0ÈÄÿö ­z×~BüÕæè=÷Ý×þöÖ­Çb±Ïéº.ûûûÅÊ•+yíµ×˜5ksçÎåùçŸg×®]ŸÇW‘’ÉäaÇ™Íå8aöl¢‘ÝÙ,™)SDg¡Pÿ£LæÙŒJÑv,ß}×]‰¯¬^½±jšS¥”Ÿ›Í†®ëcIª’Îd(‹K%E¡T*áv¹Ðt]×±ÛíԄøÝîÃŽq¼CJI"#22‚eš8œN¤”T«Uü~?BÒ™ £É$³çÌaß¾}D„`dêÔ¦–ÚZõ×ýýŽ+>ð@ú;6[¬âvŸ¯44hH‰ÝnGÓ4„8].*• étšjµJ&“Áïó!…L&C¹R¡©© —ÓI±X$\[‹Ýn§®®ŸßÏÔ©S™ÐÜŒ%%‰DâpuY­T°Ùl8N ù<år™I“&ñöÖ­ìÏdéìœÿ»SO}ùÎ-[úŽ+¿Þ¶í’ð–-gôO™‚·£MUQUMÓ¨V«˜¦É¾}û°, ]Ó@µZÅår¡1Ö=N¥H$ ö÷£ª*áp˜½½¤R)š[ZèììdÚôé$âq²Ù,…BaÌ7d2Å"CCCtttp ·—ªiÒ‰(qMûÂ;==·W$̲à‡Z6[ïØ»—‘ ð75f€ËéDQjB!rù<ÉÑQ„(ã P„@Óu à¡¡}ûö1uÚ4yøa¾²z5;ÇÛj>Ÿ›¢ÐÙÙI:&_(P*•°Ûí8N„”+LÓ¤\.S.•èÇý7^ziÓK[¶O©\¦h‡›MQÈçó ƒššñ8ÉD‚¢aaùòåìÞ½›¢a ØlԆäR)œt»Þýûqº\x=¦MŸÎ—¿üeù\Žb©D.—#as¾ûýï?²þÙg“Ÿ:?Bï«éï'´~=…úzRÍ͇ýËå!R2::JÀïGÕ4l6Ò²0 Ó4É ´µµ!¥¤¥¥e (U¥¯¯ææft]§k×./YBµZå@o/×ûÛÔÕÖÒÓÓÃì9shooÇ’¤$Í*V29iWOÏ#Ÿ:ÿ×1¦Ñÿ¥ªR©àܼ™\&ƒ1s&¾PMÓðû|ƒAìv;…q —J%t]Ç(©Žk ³çÌ¡ïàAæÏŸÏà¡Cc‰ÓÈsçÎEJÉ‹/½Äi§Fcc#ËN?h,Æc=Æ¡ÁA¢##”Æ%·ÉØl6T»½íóË–mߺeK÷§€„“$Ü, ðÒR¥‚o×."±ÑÎN|~?‡§Ã×ëÅåt‚”Ä 6› !uuu†ÁĶ6 ࿯ššt»ǃaìëîföìÙìݳ‡··nåÅKô###tuuq°¯$ |€Í,æ_rÙe¯lܸqä ÇRæë>ª¿/L“ðΑC3fàóû±«ÉÁ`†ÆF|^/¯—X4:¦*•ËÄb1*• ƒ‡Pl6&45Q,Éf³ ›¢Çy{ëVÒãšã{yÆ{÷$ãt¹¨©¯¯Éf³«®ºêª­ëÖ­ëûDÜ ‹,ø—{ûï7ÿž=dìv†Z[ …B‡•Û=VN ˜Õ*ºÝN<Çát¢i–i’Ëç‰E£‡õÃ7nä‹.bó+¯àóùÐuR©tX¨Õu}¬nYx<t]'™L2<4D]}½/•J]qñÅ¿õÜsÏõ|¾ œy,þ¢æÍ7ÙרHÊãÁár¡kc{ @€ææf›` @±X¤R©P©V@&“Áãõ2šL2sÖ,jëêp»\èv;—_~9§œr ÁPˆr©D¹\&ð^Jn·Û1 ¡( ËJ¥"TMûò—Î?Ó /¼pð˜°Ä‚ ðkÈœøÒKÄ3ëêȔ˘¦‰M<–i¢ª*‡B¡0¦k…|—ËE±TÂ9ž †ÁÀÀ'Nä¼sÏ¥¥¥…@0H¥Rahh,Z¼˜ÎÎNÜn7n·›pm-š¦‰T*%£##"‹}±§§ç·k×®-5„ߟ`ï^}W¶}ûˆxŸ jš¥%K–Ô~qùòìGÂŽ¿Ø"#Ǽè£&/Ç"ZðŒëTxEÀáôò7wÞyêî®®™Í­­J&&P__Ï”)SÀÞ={¤ÝnƒƒƒÛ·mk»|ÍšËÞxýõ5===׌ .G3‘à> WáeÌi®blÉ_ ïÓ$\Ä8∵¿J> I9¶wÿñ·½nÉÃvëÏÞ”J§ÏŽ _[©Vg´·µQ5MZÆUû˜\&G"á¯ówlß^˜pÿýÕOØéÝ ì–p¿ËÇ#Èßû<ïˆ9u¸xGŒøâxÜqÑ»À³ã4õHšÝñË_Ný§o|£çÇ?ùÉ¢D<~¢ª—†Q»dÉz{{:tˆB¡ÐÓÜÜ|{©RyãÖŸþôon8 F8Ó2V«L=âÜ~.GÆ}y ^“𬠞§ÊDzû|ð–Í›7ßh×44]§P(ü¬T*½uÏÝwào`E˜¦Â ¬°`±§„[©ŒÅÕ.sÌ¡œ/?F²ñ~»ýöÛ?Õ׿þÔo¾ÙZû£ÝðǧžüX Brl§Ù&¼ñÿ¾‡cï èƒIEND®B`‚doclifter-2.11/doclifter0000775000175000017500000136062712152465736013511 0ustar esresr#!/usr/bin/env python r""" doclifter: translate man/mdoc/ms/me/mm sources to DocBook. By Eric S. Raymond, copyright 2002, 2006, 2007. Released as open source under the BSD license. This comment is addressed to you if you want to add support for another macro package to doclifter. Or if you have encountered a bug in doclifter and need to understand the code in doclifter in order to fix it. Or if you just want to understand how it works. This code has only one piece of global state: globalhints. Two other globals, stdout and stderr, don't retain state. A global prettyprinter instance named `pretty' may be created if you're debugging. Internally, doclifter consists mainly of a framework class called DocLifter. This class is instantiated and told to do its stuff by a routine called transfile, which handles all I/O to disk and gives doclifter its cc-like invocation protocol. Underneath, it passes TroffInterpreter a string consisting of the entire text of the file to be translated and accepts a translated string back. TroffInterpreter provides I/O and other basic services for a stack of request interpreters. Interpreters get added to the stack when TroffInterpreter recognizes certain patterns in the input; see the table interpreter_dispatch for details. If a string pattern added to this table is length 2, TroffInterpreter will assume it is a request name and check to make sure that it's not a macro. The interpreter stack always includes TroffInterpreter at the bottom. This request interpreter handles the small set of troff requests that we translate, including .so, .nf, .fi, .if, etc. It also handles macro and string expansion. Note that commands are looked up in reverse order of class activation, e.g most rescent extension set first. This means that definitions in a later class override definitions in earlier ones. Each request interpreter is a class that provides methods and members to be used by the framework. Here they are: name The name of the macro set exclusive Whether this is a "major" macro set like man, mdoc, mm, ms, or me -- as opposed to a minor one like pod2man or TkMan. Whichever major macro set is triggered earliest in the file gets a lock on it; trigger patterns from other exclusive macros are subsequently ignored. toptag The top-level tag in the type of DocBook that this request interpreter generates. The top tag for the generated XML will be the top tag of the only exclusive macro set in the stack, if there is one; otherwise it will be the top tag of the most recently added interpreter. ignore_set Tags to ignore. List here any presentation-level tags that don't have structural implications. They will be silently discarded. Note: there is a potential subtle gotcha in the handling of ignore sets. The code presently assumes that no tag in any interpreter's ignore set is handled by any other interpreter. complain_set Tags to complain about. Put here things that can't be translated out of presentation level but that might have structural meaning (such as indentation changes). The user will be warned on stderr when these come up. Otherwise they're ignored. parabreak_set The set of tags that forces a new paragraph without changing the document section. Used to recognize the end of lists. sectionbreak_set The set of tags that forces a new document section. Things that are going to translate to a DocBook sect, refsect, or section tag should go here. listbreak_set The set of tags that forces an end to a list section. Normally includes everything in the sectionbreak_set. translations Special-character to ISO literal mappings. These are applied late in the translation, *after* string and macro evaluation. It's also useful to know that your request interpreter can call the function declare_body_start() to tell the framework class where the body of the document starts (as opposed to the preamble full of troff requests). This infornation is used to restrict the scope of character translations. interpret The request interpreter. Called on every input line that begins with a command character, that is . or ' not followed by another ' on the same line. This method needs to be careful about troff continuation (\c) characters. If you add trailing markup to a line, or entirely replace the line, be sure to check for trailing \c first, remove it if present, and paste it back on the end. preprocess, postprocess Pre-processing and postprocessing hooks. Each takes a string (assumed to be the entire file text) and returns a string. reductions: A list of pairs of macro names. In each pair, the first is to be replaced by the second if this macro set is active and the definition of the first contains the second. This member is useful for replacing stereotyped wrapper macros with standard constructs that the translator knows how to handle. Most frequent case: lots of man page authors define a .Pp macro that does various funky things in troff but just expands to .PP in nroff. If we replace this with .PP various nasty parsing situations suddenly don't break. The easiest way to write a full-blown new request interpreter is to take an existing one and mutate it. If the macro package you are trying to support merely adds a few tags to an existing one, consider writing an interpreter for just those tags and adding it to the stack (this is the way the Pod2ManInterpreter code relates to ManInterpreter). Warning: much of this code is grubby. Alas, the grubbiness is intrinsic, because the troff request language is grubby. """ import sys, os, glob, re, string, exceptions, tempfile, time, pprint, commands version = "2.11" # This is a speed hack recommended by Armin Rigo. It cuts runtime by about 33% # and makes it possible for psyco 1.2 to reduce runtime another 33%. re_cache = {} def re_compile(st, flags=0): try: return re_cache[st] except KeyError: r = re_cache[st] = re.compile(st, flags) return r # In order: Dutch, English/German, French, Italian, Norwegian/Danish, Polish, # Spanish, Swedish. name_synonyms = re.compile("^(naam|name|nom|nome|navn|nazwa|nombre|namn)$", re.I) # How to detect synopses synopsis_label = re.compile("SYNOPSIS$", re.I) synopsis_header = re.compile(r'\.S[Hh]\s*"?(?:SYNOPSIS)', re.I) # Qt part descriptions. It's OK to see these in function synopses, we just # turn them into an info section. qt_headers = ("Public Members", "Public Slots", "Signals", "Static Public Members", "Properties", "Protected Members",) # Used to distinguish first-level section headers from second-level ones # when the Qt grotty hack is enabled. caps_header = re.compile("^[A-Z ]*$") # These have to be messed with by the Qt grotty hack. qt_invert = ("Property Documentation", "Member Type Documentation") blankline = re.compile(r"^\s*$") # Start tag on a line by itself endtag = re.compile("<[^>]*>$") # Used in C syntax recognition c_declarators = ("void", "char", "short", "int", "long", "float", "double", "signed", "unsigned", "typedef", "struct", "union", "enum", "const", "volatile", "inline", "restricted", # C9X "virtual",) # C++ # Used to strip headers off generated HTML documents. xmlheader = re.compile(r"<\?.*\?>\n") doctype = re.compile(r"<\!DOCTYPE[^>]*\>\n") # These patterns are applied *after* special-character translation # Match an RFC822 email address, possibly with surrounding <>. # This is the right thing because the XSL stylesheets surround # content with <> on output. email_re = re.compile(r"\b(?:<)?(?P[-\w_.]+@[-\w_.]+)(?:>)?\b") # Match an URL. This pattern is carefully constructed not to eat # a following period if (as is often the case) it occurs at the # end of a sentence. url_re=re.compile(r"(?P\b(http|ftp|telnet|mailto)://[-_%\w/&;.~]+[-_%\w/&;])") # Match a xmlns URL in the top level tag, so that the url_re does not try to ulink-ize it. xmlns_re=re.compile(r"\w xmlns='http://docbook.org/ns/docbook'") # Match a troff highlight troff_highlight = re.compile(r"(\\[fF]\([A-Z][A-Z])|(\\f\[[A-Z]*\])|(\\[fF][A-Z0-9])|(\\F\[\])") troff_highlight_stripper = re.compile(r"^\.[BI] ") # Match a glue token with all preceding and following whitespace hotglue = re.compile(r"\s*@GLUE@\s*") cleantag = re.compile(r"<\1>") # Match an identifier token in C or Python id_re = re.compile("^[_a-zA-Z][_a-zA-Z0-9]*$") # List how troff specials that can appear as list tags map into # DocBook mark types. According to Norm Walsh's DSSL and XSL # stylesheets, both toolchains have two styles available; bullet and # box. An older version of the DocBook documentation said that in # itemizedlists the attributes can be the three names HTML supports: "disc", # "circle", and "square", with "bullet" as a synonym for "disc" and # "box" as a synonym for "square". We map dash to box here for consistency # with the -dash/-bullet distinction in mdoc, where -dash can only # reasonably be mapped to box rather than disc. ip_tag_mapping = { r"\(bu":"bullet", r"\(sq":"box", "*" : "bullet", "-" : "box", } # Add this to the V4 preamble when we have MathML elements mathml_entities = ''' %mathml; ''' # Add this to the V5 preamble when we have entities allent = ''' %allent; ]>''' # Convert empty man pages generated by POD, but be rude about it. rudeness = """This empty page was brought to you by brain damage somewhere in POD, the Perl build system, or the Perl maintainers' release procedures.\ """ empty = """\ Description """ + rudeness + """ """ # Verbosity thresholds general_verbosity = "g" # More details on warnings section_verbosity = "s" # Show section pushes and pops classify_verbosity = "c" # Show section classification details parse_verbosity = "p" # Show synopsis parse details macro_verbosity = "m" # Show expression evaluation details highlight_verbosity = 'h' # Show highlight resolution details io_verbosity = "i" # Show low-level I/O interpreter_verbosity = "z" # Show low-level interpreter checks bsd_verbosity = 'b' # BSD macroexpansion tokenizer_verbosity = 'x' # Tokenizer verbosity timing_verbosity = 't' # Execution profiling supersub_verbosity = 'u' # Super/subscript recognition velocity. def deemphasize(st): "Throw out highlighting info from a string." return troff_highlight.sub("", st) def is_command(line): # This works around a common bug -- string-enclosing ' at the left margin return len(line) > 1 and \ (line[0] == TroffInterpreter.ctrl or (line[0] == TroffInterpreter.ctrl_nobreak and line[1:].find(TroffInterpreter.ctrl_nobreak) == -1)) def is_comment(line): # The malformed crap people write as troff comments is amazing... line = line.replace(" ", "").replace("\t", "") return line == TroffInterpreter.ctrl or line == TroffInterpreter.ctrl_nobreak or line[:3] in (r'.\"', r'/\"', r'./"', r".\'", '\'\\"', r'\'\"', r'\".', r"...", r"'''", r"\!.") or line[:2] in (r'."', r".'", r'\"', r"'#", r"\#") or line[:4] in (r'.\\"', r"'.\"") def match_command(line, tag): # Cope with the possibility of spaces after the dot if not line or line[0] not in (TroffInterpreter.ctrl, TroffInterpreter.ctrl_nobreak): return False tokens = line[1:].strip().split() return tokens and tokens[0] == tag def quoteargs(tokens): "Quote argument tokens so that re-parsing them won't produce surprises." if len(tokens) == 0: return "" elif len(tokens) == 1: return tokens[0] else: return tokens[0] + ' "' + '" "'.join(map(lambda x: x.replace('"', '""'), tokens[1:])) + '"' #def untagged(pattern): # "Transform the pattern to guarantee that it won't match marked-up text." # # Warning! Only really works with fixed-length patterns. # return re_compile("(?)" + pattern.pattern + "(?! -1 and istr[last_font_escape+2] not in "R": istr += r"\fR" istr = re_compile(r"\f[^P]\fR$").sub(r"\fR", istr) last_font_escape = istr.rfind(r'\F') if last_font_escape > -1 and istr[last_font_escape+2:last_font_escape+4] != "[]": istr += r"\f[]" return istr def get_xml_char(istr): "Extract a leading character or XML escape from the string." if len(istr) == 0: return "" elif istr[0] != "&": return istr[0] else: take = 1 while istr[take] != ';': take += 1 return istr[:take+1] def make_comment(istr): if istr.startswith("."): istr = istr[1:] istr = istr.replace(r'\"', "").replace(r'\\"', "").replace(r'\(co', "(C)") istr = istr.strip() return "" def lineparse(line): "Parse arguments of a dot macro." if not is_command(line): return None #stderr.write("About to parse: " + line + "\n") tokens = [line[0]] state = 'dot' # Start after the dot in dot state for c in line[1:]: if state == 'dot': # accumulating a token if c in (" ", "\t"): continue else: tokens[-1] += c state = 'token' elif state == 'token': # accumulating a token if c in (" ", "\t"): state = 'ws' elif c == '\\': tokens[-1] += '\\' state = 'tokencont' else: tokens[-1] += c elif state == 'tokencont': # accumulating a token if c in (" ", "\t", "\n"): tokens[-1] = tokens[-1][:-1] tokens[-1] += c state = 'token' elif state == 'ws': # in whitespace if c in (" ", "\t"): continue elif c == '"': tokens.append('"') state = 'string' elif c == '\\': state = 'leader?' else: tokens.append(c) state = 'token' elif state == 'string': # in string tokens[-1] += c if c == '"': state = 'stringend' elif state == 'stringend': # just saw end-of-string, what now? if c == '"': state = 'string' elif c in (" ", "\t", "\n"): state = 'ws' elif c == '\\': state = 'leader?' else: state = 'token' tokens.append(c) elif state == 'leader?': # possible comment leader if c == '"': break elif c in (" ", "\t", "\n"): tokens.append(c) state = 'token' else: tokens.append("\\" + c) state = 'token' # Special case: turn trailing brackets into an argument if len(tokens) == 1: trailer = tokens[0][3:5] if trailer in (r"\{", r"\}"): tokens[0] = tokens[0][:3] tokens.append(trailer) return tokens def stripquotes(arg): "Perform quote-stripping appropriate for macros and .ds commands." if type(arg) == type([]): return map(stripquotes, arg) else: if arg and arg[0] == '"': arg = arg[1:] if arg and arg[-1] == '"': arg = arg[:-1] return arg class LiftException(exceptions.Exception): def __init__(self, message, retval=1): self.message = message self.retval = retval class Dropout(exceptions.Exception): pass class SemanticHintsRegistry: "Represent all the semantic information gathered during a run." def __init__(self): self.dictionary = {} def post(self, token, ptype): "Post an association of a string with a semantic markup type." #stdout.write("Markup %s as %s\n" % (token, ptype)) self.dictionary[token] = ptype def get(self, token): return self.dictionary.get(token) def apply(self, text): "Apply all known hints to lift tokens in a text string." # stderr.write("Marked tokens:" + `self.dictionary` + "\n") for (token, tag) in self.dictionary.items(): with_hi = r"(%s)" % token #stdout.write("marking %s as %s via %s\n" % (token, tag, with_hi)) try: ender = tag.split()[0] # discard attributes text = re_compile(with_hi).sub(r"<%s>\1"%(tag,ender),text) text = re_compile(r"\b("+token+")\b").sub(r"<%s>\1" % (tag, ender), text) except re.sre_compile.error: pass return text def read(self, rinput): "Read in a hints string or file as dumped by __str__" if hasattr(rinput, "read"): fp = open(rinput) data = fp.readlines() fp.close() else: data = rinput.split('\n') for line in data: if line.startswith('.\\" | '): # Someday we'll have more declarations try: (mark, token, as_word, markup) = line[5:].split() if mark != "mark" or as_word != "as": continue self.post(token, markup) except ValueError: continue def __repr__(self): "Dump a representation of hint info." out = '.\\" Begin doclifter hints.\n' for (token, tag) in self.dictionary.items(): out += '.\\" | mark %s as %s\n' % (token, tag) out += '.\\" End doclifter hints.\n' return out class Frame: "Frame state for the list-markup stack." def __init__(self, command, ftype): self.command = command self.type = ftype self.count = 0 def __repr__(self): return "" class DocLifter: "DocBook translation of generic troff macros." # In each tuple, the first element is an emphasis remap attribute. # The second element is a regexp to match to the tag content. # If the regexp matches, the bracketing emphasis tags are replaced # with the semantic tag in the third column. lift_highlights = map(lambda x: (re_compile(r"(%s)" % (x[0], x[1])), x[2]), ( ("SM", r"[A-Z.]*", "acronym"), # Historical -- SM is rare ("SM", r"[A-Z]+_[A-Z_]+", "envar"), # In bison.1, cvs.1 ("[BI]",r"-[^<]+", "option"), # likely command option man(7) ("[BI]",r"[0-9.]+", "literal"), # literal value ("[BI]",r"[a-zA-Z0-9.]+((\s| )--?[^<]+)+", "userinput"), # user command ("[BI]",r"\.[a-zA-Z][^<]*", "markup"), # roff markup ("[BI]",r"/[^<]+", "filename"), # Marked filenames ("[BI]",r"~/[^<]*", "filename"), # Home directory filenames ("[BI]",email_re.pattern,"email"), # email addresses ("[BI]",r"SIG[A-Z]+", "constant"), # signal ("[BI]",r"errno", "varname"), # variable ("[BI]",r"[a-z_]*_t", "type"), ("[BI]",r"[a-z_]+(?:\(\))", "function"), # Error codes. This is the Linux set. ("[BI]",r"E2BIG", "errorcode"), ("[BI]",r"EACCES", "errorcode"), ("[BI]",r"EAGAIN", "errorcode"), ("[BI]",r"EBADF", "errorcode"), ("[BI]",r"EBADMSG", "errorcode"), ("[BI]",r"EBUSY", "errorcode"), ("[BI]",r"ECANCELED", "errorcode"), ("[BI]",r"ECHILD", "errorcode"), ("[BI]",r"EDEADLK", "errorcode"), ("[BI]",r"EDOM", "errorcode"), ("[BI]",r"EEXIST", "errorcode"), ("[BI]",r"EFAULT", "errorcode"), ("[BI]",r"EFBIG", "errorcode"), ("[BI]",r"EINPROGRESS", "errorcode"), ("[BI]",r"EINTR", "errorcode"), ("[BI]",r"EINVAL", "errorcode"), ("[BI]",r"EIO", "errorcode"), ("[BI]",r"EISDIR", "errorcode"), ("[BI]",r"EMFILE", "errorcode"), ("[BI]",r"EMLINK", "errorcode"), ("[BI]",r"EMSGSIZE", "errorcode"), ("[BI]",r"ENAMETOOLONG","errorcode"), ("[BI]",r"ENFILE", "errorcode"), ("[BI]",r"ENODEV", "errorcode"), ("[BI]",r"ENOENT", "errorcode"), ("[BI]",r"ENOEXEC", "errorcode"), ("[BI]",r"ENOLCK", "errorcode"), ("[BI]",r"ENOMEM", "errorcode"), ("[BI]",r"ENOSPC", "errorcode"), ("[BI]",r"ENOSYS", "errorcode"), ("[BI]",r"ENOTDIR", "errorcode"), ("[BI]",r"ENOTEMPTY", "errorcode"), ("[BI]",r"ENOTSUP", "errorcode"), ("[BI]",r"ENOTTY", "errorcode"), ("[BI]",r"ENXIO", "errorcode"), ("[BI]",r"EPERM", "errorcode"), ("[BI]",r"EPIPE", "errorcode"), ("[BI]",r"ERANGE", "errorcode"), ("[BI]",r"EROFS", "errorcode"), ("[BI]",r"ESPIPE", "errorcode"), ("[BI]",r"ESRCH", "errorcode"), ("[BI]",r"ETIMEDOUT", "errorcode"), ("[BI]",r"EXDEV", "errorcode"), # Standard environment variables from environ(5). ("[BI]","USER", "envar"), ("[BI]","LOGNAME", "envar"), ("[BI]","HOME", "envar"), ("[BI]","LANG", "envar"), ("[BI]","PATH", "envar"), ("[BI]","PWD", "envar"), ("[BI]","SHELL", "envar"), ("[BI]","TERM", "envar"), ("[BI]","PAGER", "envar"), ("[BI]","EDITOR", "envar"), ("[BI]","VISUAL", "envar"), ("[BI]","BROWSER", "envar"), # Common library environment variables, also from environ(5) ("[BI]","LANG", "envar"), ("[BI]","LANGUAGE", "envar"), ("[BI]","NLSPATH", "envar"), ("[BI]","LOCPATH", "envar"), ("[BI]","LC_ALL", "envar"), ("[BI]","LC_MESSAGES", "envar"), ("[BI]","TMPDIR", "envar"), ("[BI]","LD_LIBRARY_PATH", "envar"), ("[BI]","LD_PRELOAD", "envar"), ("[BI]","POSIXLY_CORRECT", "envar"), ("[BI]","HOSTALIASES", "envar"), ("[BI]","TZ", "envar"), ("[BI]","TZDIR", "envar"), ("[BI]","TERMCAP", "envar"), ("[BI]","COLUMNS", "envar"), ("[BI]","LINES", "envar"), ("[BI]","PRINTER", "envar"), ("[BI]","LPDEST", "envar"), )) post_translation_patterns = ( # man(7)-style man-page references (re.compile(r"([^ ]+)(?:&zerosp;| )?\(([0-9]+[A-Za-z]?)\)"), r"\1\2"), # Here's where we fold all those continuation lines. (re.compile(r"\\c"), "\n"), (re.compile("\\\c\n"), ""), # Interpret attempts to fake up double quotes. Should be safe as # these never occur in program listings. (re.compile("``([^`']+)''"), r"“\1”"), ) post_lift_patterns = ( # Find a highlight directly after an "), ("\\\\m\[([a-z]+)\]", r""), )) for (regexp, inline) in TroffInterpreter.prefix_lifts + color_lifts: text = regexp.sub(inline, text) # And we may need to emit some compatibility warnings if self.source.portability: if self.nonportable_features: self.nonportable_features = list(set(self.nonportable_features)) self.source.filewarn("portability warning: nonportable requests '%s' seen.\n" % ", ".join(self.nonportable_features)) if self.source.portability >= 2: if self.longnames: self.source.filewarn("portability warning: groff-style long macro names '%s' seen." % ", ".join(self.longnames)) if self.groff_features: self.groff_features = list(set(self.groff_features)) self.source.filewarn( "portability warning: groff extension%s '%s'." % \ (("", "s")[len(self.groff_features) > 0], ", ".join(self.groff_features))) return text # # Some formatting functions are common across more than one macro set. # def skip_ignorables(source): "Skip blank lines and ignorable commands." while source.lines: line = source.popline() if line == TroffInterpreter.ctrl + "end": source.pushline(TroffInterpreter.ctrl + "end") break elif line == None: break elif line in ("", TroffInterpreter.ctrl, TroffInterpreter.ctrl_nobreak): # Skip blank or null lines continue elif source.paragraph_break(line): # Skip ordinary paragraphs continue else: if not is_command(line): # Non-blank text line source.pushline(line) break else: tokens = lineparse(line) if source.ignorable(tokens[0]): continue source.pushline(" ".join(tokens)) break def gather_lines(source): "Gather text lines until we hit a command." res = [] while source.lines: line = source.popline() if is_command(line) and line[1] in "ABCDEFGHIJKLMNOPQRSTUVWXYZ": source.pushline(line) break if not (is_command(line) and source.ignorable(line)): res.append(line) return res def gather_item(source, tag=None): "Gather item, emitting opening and closing listitem tags." if section_verbosity in source.verbose: source.notify("gather_item(%s)\n" % tag) if tag: source.emit("<" + tag + ">") source.need_paragraph() savesect = [] outlines = [] # Discard commands that generate nothing skip_ignorables(source) # Now gather the list item proper source.listitem = True if section_verbosity in source.verbose: stderr.write("gathering list item\n") while source.lines: line = source.popline() # Maybe we're looking at a commented-out entry if line == TroffInterpreter.ctrl + "ig": savesect.append(TroffInterpreter.ctrl + "ig") while True: line = source.popline() savesect.append(line) if line == TroffInterpreter.ctrl + ".": break continue elif line is None: break elif line.startswith(TroffInterpreter.ctrl + "blank"): # The point is not to end the list on these. savesect.append(TroffInterpreter.ctrl + "blank") elif source.section_break(line): # Push back any blank lines before the section break. # This avoids generating some spurious paragraph() # calls that can litter the output with extra close tags. while savesect and blankline.match(savesect[-1]): source.pushline(savesect[-1]) savesect.pop() source.pushline(line) break elif source.paragraph_break(line): source.pushline(line) break else: savesect.append(line) if interpreter_verbosity in source.verbose: source.notify("interpreting savesect: " + `savesect`) source.interpret_block(savesect, outlines) if interpreter_verbosity in source.verbose: source.notify("interpretation of savesect complete\n") if filter(lambda x: not not x and x[:4] != "") source.listitem = False source.end_paragraph(label="gather_item") if tag: source.emit(r"") if section_verbosity in source.verbose: source.notify("gather_item(%s)\n" % tag) def gather_simplelist(cmd, source): "Gather listitems, terminate when you see a dot command." while len(source.lines): line = source.popline() if not line.startswith(cmd): source.pushline(line) break else: gather_item(source, "listitem") def gather_itemizedlist(cmd, source, bullet): "Translate to bullet-list markup -- used in both man and me macros." source.emit("" % bullet) gather_simplelist(cmd, source) source.emit("\n") def gather_orderedlist(cmd, source, bullet): "Translate to numbered-list markup." source.emit("" % bullet) gather_simplelist(cmd, source) source.emit("\n") def parse_name_section(nameline): "Parse a NAME -- description line." nameline = deemphasize(nameline) nameline = nameline.replace("\t", r' ') nameline = nameline.replace(r" \-\- ", r' \- ') nameline = nameline.replace(" - ", r' \- ') nameline = nameline.replace(r" \(hy ", r' \- ') # Apparent pod2man breakage... nameline = nameline.replace(r"&zerosp;-", r"\-") if nameline.find(r" \- ") == -1: nameline = nameline.replace(r" \(em ", r' \- ') nameline = nameline.replace(r" — ", r' \- ') # SDL pages make this kluge necessary nameline = nameline.replace("--", r' \- ') nameline = nameline.replace(r"\-", r" \- ") return nameline.split(r' \- ') # # Display-parsing machinery. # class ParseNode: def __init__(self, ntype, token=None, choice="plain", repeat=0): self.type = ntype self.token = token self.choice = choice self.righthand = None self.repeat = repeat self.glue = None self.children = [] def __repr__(self): if self.type == "option": if self.righthand: return "%s=%s" % (self.token, self.righthand) else: return self.token + self.glue elif self.type == "replaceable": return "%s" % (self.token) elif self.type in ("arg", "group"): pre = "<%s" % self.type if self.choice: pre += " choice='%s'" % self.choice if self.repeat: pre += " rep='repeat'" pre += ">" post = "" % self.type res = "" for child in self.children: res += `child` return pre + res + post elif self.type == "@GLUE@": return "@GLUE@" elif self.type == "redirect": return "" + self.token + "" elif self.type == "sbr": return "" elif self.type == "\n": return "" else: res = "" for child in self.children: res += `child` return ("<%s>" % self.type) + res + ("" % self.type) def is_file_or_command_name(tok): # Yes, some legitimate commands begin with digits; # 411toppm is a good example. if not tok: return None else: return tok[0] in string.letters+"/" or (tok[0] in string.digits and tok[-1] in string.letters) def detroff(ln): # Remove markup generated by the Mdoc document macros. It may seem # a bit screwy to generate this stuff just to throw it away, but # we actually want these expansions everywhere outside of a synopsis. ln = ln.replace("", "").replace("", "") ln = ln.replace("", "").replace("", "") ln = ln.replace("", "").replace("", "") ln = ln.replace("", "") ln = ln.replace("", "") ln = re.sub(r"", "") # Some man pages (like afmtodit.1) run options together with their # following arguments together on the man page, with the boundary # marked by a highlight change. Replace these with a glue token so # there will be a parseable boundary there. ln=DisplayParser.old_style_option_glue.sub(r"\1 @GLUE@ \3",ln) # We have now extracted all the semantic information we can from # highlight boundaries. ln = deemphasize(ln) # Throw out the entity results of translating some confusing troff # characters. Yes, some man pages (notably several associated with # nmh) throw soft hyphens in there for no obvious reason. ln = ln.replace(" ","").replace(" ","").replace("&zerosp;","") ln = ln.replace(" "," ").replace("­", "").replace("\\", "") ln = ln.replace(r"-^-", "--").replace("—", "--") return ln class LineTokenizer: "Make a collection of lines available either as lines or tokens." def __init__(self, lines, verbose=False): self.lines = lines self.verbose = verbose self.pretokenizer = None self.token_index = 0 self.lookahead = [] self.lookbehind = [] self.savedlines = [] self.mark = 0 self.tokenize() def popline(self): "Grab the next line and make it the token buffer." if not self.lines: if self.verbose: stdout.write("popline: returns None\n") return None else: if self.verbose: stdout.write("popline: starts with: %s\n" % self) res = self.lines[0] self.savedlines.append(self.lines.pop(0)) self.lookahead = [] if self.lines: self.tokenize(self.pretokenizer) if self.verbose: stdout.write("popline: returns: %s %s\n" % (`res`, self)) return res def pushline(self, line): "Replace the token buffer with the current line." self.lines = [line] + self.lines self.tokenize(self.pretokenizer) if self.verbose: stdout.write("pushline: leaves: %s\n" % self) def peekline(self): "Return the token buffer" if not self.lines: return None else: return self.lines[0] def tokenize(self, new_pretokenizer=None): "Split a line on whitespace, but preserve \n as a token." if self.verbose: stdout.write("tokenize: %s\n" % (new_pretokenizer,)) self.pretokenizer = new_pretokenizer if self.lines: if self.pretokenizer: line = self.pretokenizer(self.lines[0]) else: line = self.lines[0] self.lookahead = line.strip().split() if line.endswith('\n'): self.lookahead.append('\n') if self.verbose: stdout.write("tokenize: split %s to get %s\n"%(line,self)) def token_pop(self, count=1): "Get a token." res = self.token_peek(count) self.lookbehind += self.lookahead[:count] self.lookahead = self.lookahead[count:] self.token_index += count if self.verbose: stdout.write("token_pop: returns %s, from %s\n" % (`res`, self)) return res def token_push(self, tok): "Put back a token." if self.verbose: stdout.write("token_push: %s, to %s\n" % (tok, self)) if not self.lines: self.lines = [tok] elif not self.lookahead: self.lines = [tok] + self.lines self.lookahead = [tok] + self.lookahead if self.verbose: stdout.write("token_push: ends with %s\n" % self) def token_peek(self, count=1): "Peek at the next token. The count argument can only index into the next line." if not self.lookahead and not self.lines: return None if self.verbose: stdout.write("token_peek: I see " + `self` + '\n') while len(self.lookahead) == 0: if not self.lines: if self.verbose: stdout.write("token_peek: I return None: "+`self`+'\n') return None self.popline() if self.verbose: stdout.write("token_peek: I return %s from %s\n" % (`self.lookahead[count-1]`, self)) return self.lookahead[count-1] def checkpoint(self): "Restart saving of lines from this point." self.savedlines = [] if self.verbose: stdout.write("checkpoint: done\n") def unroll(self): "Restore all saved lines, used to undo parsing effects on error." self.lines = self.savedlines + self.lines self.tokenize(self.pretokenizer) if self.verbose: stdout.write("unroll: restores to %s\n" % (self)) def __str__(self): "Display the state of the object." return "" % (self.lookahead, pretty.pformat(self.lines[:5])) __repr__ = __str__ def text(self): return "".join(self.lines) class FunctionSynopsisParser: "Consume a function synopsis and return markup." # Candidate lines for FuncSynopsisInfo language_lines = ( (re_compile(r"^\s*#\s*(define|undef|include|if\s|ifn?def|endif|extern)"), "C"), (re_compile(r"^\s*typedef.*;$"), "C"), (re_compile(r"^\s*import\s"), "Python"), (re_compile(r"^\s*use\s.*;"), "Perl"), (re_compile(r"#\s*perl"), "Perl"), ) # This patterns identify lines that are probably code language_fragments = ( # This is looking for the stuff that one finds around the left # paren of a C declaration. This is something we're quite unlikely # to see in running text. (re_compile(r"[a-z][a-z][a-z]\([_a-zA-Z][_a-zA-Z0-9]+[, ]"), "C"), # Look for lines led with C declarations (re_compile(r"^\s*(int|char|long)\s"), "C"), # Someday, use these #(re_compile(r"^\s*def\s"), "Python"), #(re_compile(r"^\s*class\s"), "Python"), ) token_pairs = ( (re_compile(r"^\s*/\*"), re_compile(r"\*/$"), "C","C comment"), # typedef/struct/union end only on ^} because they can have {} inside (re_compile(r"^\s*typedef.*{$"), re_compile(r"^}"), "C","C typedef"), (re_compile(r"^\s*struct.*{$"), re_compile(r"^}"), "C","C struct"), (re_compile(r"^\s*union.*{$"), re_compile(r"^}"), "C","C union"), # With enum we can be a bit more relaxed (re_compile(r"^\s*enum\b"), re_compile(r"};?"), "C","C enum"), (re_compile(r"^\s*extern\b"), re_compile(r";$"), "C","C extern"), ) def __init__(self, io, source): self.io = io self.source = source self.output = "" self.language = None self.error = None self.seen_ansi = False # Shortcut: assume | and ') (' and ] [ can never occur in a function # synopsis (middle two filters out some Perl code examples). # Make an exception for || as this never occurs in those but may mean # there is code for a disjunction of feature macros, as in logf(3). # Look for these and return immediately if we find them. if filter(lambda x: ("||" not in x and "|" in x) or "('" in x or "')" in x or "] [" in x, self.io.lines): if classify_verbosity in self.source.verbose: self.source.notify("can't be a function synopsis, contains | or '] ['") self.error = "" return # Shortcut: to be parseable C, headers must contain (. # Command synopses generally have neither. # (We used to test for ; but XML entity expansions messed that up.) if not self.io.lines[0].startswith("#include"): if not filter(lambda x: "(" in x, self.io.lines): if classify_verbosity in self.source.verbose: self.source.notify("can't be a function synopsis, does not contain (") self.error = "" return # Otherwise time for a normal parse self.io.tokenize(self.__pretokenizer) try: try: if classify_verbosity in self.source.verbose: self.source.notify("beginning function synopsis parse: " + `self.io`) self.output = "" while self.io.lines: info = self.__parse_function_synopsis_info() proto = self.__parse_function_prototype() if info or proto: self.output += info + proto else: break if self.output: self.output = "\n"+self.output+"\n" finally: if classify_verbosity in self.source.verbose: self.source.notify("ending function synopsis parse: " + self.output) except LiftException, e: self.error = "function synopsis parse failed on `%s' (%d): %s" % \ (self.io.token_peek(), self.io.token_index, e.message) if classify_verbosity in self.source.verbose: self.source.notify(self.error) # Since we can detect function synopses reliably, check here # and make self.output nonempty so we'll error out and not try # doing a command parse. if filter(self.is_sourcecode, self.io.lines): self.output = "" self.io.tokenize() def is_sourcecode(self, text): "Recognize that a line is source code." if blankline.search(text): return True for (pattern, dummy) in FunctionSynopsisParser.language_lines: if pattern.search(text): return True for (pattern, dummy) in FunctionSynopsisParser.language_fragments: if pattern.search(text): return True return False def __pretokenizer(self, line): line = detroff(line) line = line.replace(")", " ) ").replace("(", " ( ") line = line.replace(",", " , ").replace("*", " * ") line = line.replace("[", " [ ").replace("]", " ] ") line = line.replace(";", " ; ").replace("~", " ~ ") return line def __detokenize(self, line): return line.replace("[ ]", "[]").replace("* ", "*") \ .replace(" ; ", ";").replace(" ~ ", "~") def __parse_paramdef(self, arg): "We've been handed a formal argument; parse it into a ParamDef." if not arg: # Triggered by ,) which can be generated by mdoc return "" if len(arg) == 1: return " "+arg[0]+"\n" # If there is a function prototype in the declaration, strip it. # No, this won't handle nested prototypes. def rindex(x, lst): last = len(lst) - 1 for i in range(0, last+1): if lst[last - i] == x: return last - i return -1 last = len(arg) - 1 if arg[-1] == ')': last = rindex("(", arg) # Now look for the rightmost token that resembles a name. # There's your parameter. param_ind = -1 for i in range(last): if arg[last - i][0].isalpha(): param_ind = last - i break if param_ind == -1: prolog = " ".join(arg) var = "" epilog = "" else: prolog = " ".join(arg[:param_ind]) var = arg[param_ind] epilog = " ".join(arg[param_ind+1:]) prolog = self.__detokenize(prolog) epilog = self.__detokenize(epilog) self.source.localhints.post(var, "varname role='parameter'") return " " + prolog + " " + var + "" + epilog + "\n" def __parse_function_prototype(self): "Parse a C or C++ function prototype." if classify_verbosity in self.source.verbose: self.source.notify("beginning function prototype parse, language %s" % self.language) try: if classify_verbosity in self.source.verbose: self.source.notify("parse_function_prototype() sees: " + `self.io`) # Seek the name token. parendepth = 0 name = None prolog = [] hint_dict = {} seentype = False self.io.checkpoint() # Munch the part before the formals while True: tok = self.io.token_pop() if classify_verbosity in self.source.verbose: self.source.notify("looking at %s" % `tok`) tnext = self.io.token_peek() # The sequence \n( should be treated like (, so a function # prototype with a line break just after the name is detected. if tnext == '\n': self.io.token_pop() second = self.io.token_peek() if classify_verbosity in self.source.verbose: self.source.notify("newline special case sees %s" % `second`) if second != '(': self.io.token_push('\n') else: tnext = second # We shouldn't run out of tokens here if tok is None: if classify_verbosity in self.source.verbose: self.source.notify("C prototype parse failed while looking for (") self.io.unroll() return "" # Cope with obnoxious Tcl sidebar marks as well as newlines elif tok in ("\n", "|"): continue # And with spurious breaks elif tok == "": continue # Accumulate C keywords if tok in c_declarators or tok.startswith('operator'): if classify_verbosity in self.source.verbose: self.source.notify("Treating %s as declarator" % tok) elif not id_re.match(tok) and not tok in ("(", ")", "*", "&", "~"): if classify_verbosity in self.source.verbose: self.source.notify("illegal token %s while looking for declaration specifiers" % tok) self.io.unroll() return "" # Assume that any identifier followed by a non-identifier ia # the function name, rather than some flukey typedef in the # declaration. This will do the right thing with # struct foo *bar(x, y) elif not name and id_re.match(tok): if tnext and not id_re.match(tnext) and tnext != '\n': name = tok if classify_verbosity in self.source.verbose: self.source.notify("name is %s, non-identifier is %s" % (name, `tnext`)) elif seentype: if classify_verbosity in self.source.verbose: self.source.notify("looks like text, not a function declaration: %s" % tok) self.io.unroll() return "" else: # Could be a typedef if classify_verbosity in self.source.verbose: self.source.notify("treating %s as a type" % tok) hint_dict[tok] = "type" seentype = True elif name and parendepth == 0 and tok == "(": break elif tok == '(': parendepth += 1 elif tok == ')': parendepth -= 1 elif tok in ("struct", "union", "enum"): hint_dict[tok + " " + tnext] = "type" prolog.append(tok) tok = self.io.token_pop() tnext = self.io.token_peek() prolog.append(tok) # Kluge to deal with C++ declarators if self.io.lookahead[:2] == [")", "("]: self.io.token_pop(2) prolog += " ()" if not name: if general_verbosity in self.source.verbose: self.source.notify("no name in apparent function declaration.") self.io.unroll() return "" if parse_verbosity in self.source.verbose: self.source.notify("Function name: " + name) prolog[prolog.index(name)] = "" + name + "" hint_dict[name] = "function" prolog = self.__detokenize(" ".join(prolog)) # Is this an old-style or a new-style declaration? firstformal = self.io.token_pop() argcount = parendepth = 0 formal_args = "" newstyle = False if firstformal == ')': # No formals at all. Treat as K&R style if parse_verbosity in self.source.verbose: self.source.notify("no formals") else: if self.io.token_peek() in (")", ","): # Just one token in the formal. This case is ambiguous; # could be a K&R-style declaration, or could be an ANSI # declaration like # virtual void setToggleAction ( bool ) # where the single formal is a typedef rather than a name. # This is why we track whether we've seen ANSI C constructions. # We also want to catch the case of # int foo(void) # here, that's what the c_declarators check is about. self.io.token_push(firstformal) newstyle = self.seen_ansi or firstformal in c_declarators or self.io.lines[0].strip().endswith(";") else: # More than one identifier in the formal self.io.token_push(firstformal) self.seen_ansi = newstyle = True if parse_verbosity in self.source.verbose: if newstyle: self.source.notify("ANSI-style declaration of %s"% name) else: self.source.notify("K&R-style declaration of %s" % name) # If it's an old-style declaration, count and skip the # formal names. Save them in case there are no argument # declarations at all. if newstyle: terminator = ',' else: terminator = ';' formalnames = [[]] if self.io.token_peek() == ")": # Excludes no-args case self.io.token_pop() else: while True: tok = self.io.token_pop() if not tok: # If we ran out of tokens without seeing a # balancing ), this isn't a C prototype at all. # Bail out. if general_verbosity in self.source.verbose: self.source.warning("no balancing )") self.io.unroll() return "" if tok == '(': parendepth += 1 if tok == ')': parendepth -= 1 if tok == ",": formalnames.append([]) argcount += 1 continue elif tok == ")": argcount += 1 if parendepth == -1: break formalnames[-1].append(tok) # We just ate the terminating paren on what looks like a # K&R-style declaration. Danger lurks here. # Are we looking at an old-style declaration with *nothing* # but formals? If so, head off any attempt to parse them, # it will only come to grief. no_declarations = False maybe_semi = self.io.token_pop() if maybe_semi == ";": no_declarations = True elif maybe_semi != "\n": if classify_verbosity in self.source.verbose: self.source.warning("suspicious token %s after )" \ % maybe_semi) self.io.token_push(maybe_semi) else: # A second newline here means there is whitespace where # we're expecting the parameter declarations. This # happens a lot on the Tcl pages. Give up. maybe_newline = self.io.token_peek() if maybe_newline in ("\n", "", None): no_declarations = True else: # We're probably looking at the first declarator. self.io.token_push(maybe_semi) # If there are no declarations, use the formal names we # stashed away. It's better than nothing. if no_declarations: if parse_verbosity in self.source.verbose: self.source.notify("no parameter declarations") for param in formalnames: formal_args += "%s\n" % " ".join(param) formal_args = self.__detokenize(formal_args) argcount = 0 # Go get the prototype formals. If this is a new-style # declaration, terminate on seeing a top-level ). If it's # old style, we've skipped past the formals and we want to # grab parameter definitions until we've counted the right # number of terminating semicolons. parendepth = 0 while newstyle or argcount: formal = [] while True: tok = self.io.token_pop() if parse_verbosity in self.source.verbose: self.source.notify("Token (%d): %s %s" % (parendepth, tok, self.io.lookahead)) if tok is None: if parse_verbosity in self.source.verbose: self.source.warning("unexpected end of token list") self.io.unroll() return "" elif tok in ("\n", ""): continue elif tok == "(": parendepth += 1 elif tok == ')': if newstyle and parendepth == 0: newstyle = 0 argcount = 1 # Terminate outer loop, break # end of formal and prototype else: parendepth -= 1 elif tok == terminator: if parendepth == 0: break # End of formal formal.append(tok) # Formal argument should be complete. Hand it off for analysis if parse_verbosity in self.source.verbose: self.source.notify("Formal: %s" % formal) formal_args += self.__parse_paramdef(formal) argcount -= 1 # We've gatherered all the argument markup if formal_args == "void": formal_args = " " if formal_args == "...": formal_args = " \n" if not formal_args: if newstyle: formal_args = "" else: formal_args = "" # Consume optional semicolons following the close paren if self.io.token_peek() in (";", ";"): self.io.token_pop() if parse_verbosity in self.source.verbose: self.source.notify("ate trailing semi") if self.io.token_peek() not in (None, "\n", ""): if parse_verbosity in self.source.verbose: self.source.warning("trailing junk '%s' after prototype" % self.io.token_peek()) self.io.unroll() return "" else: # If we're at end of line, consume the line so the next # go-around of the function synopsis parser won't see it. while self.io.token_peek() == "\n": self.io.token_pop() if parse_verbosity in self.source.verbose: self.source.notify("ate trailing newline") # Now we can assemble the actual prolog... prolog = "" + prolog + "\n" # Now assemble and return it. if prolog or formal_args: output="\n"+prolog+formal_args+"\n" # Since the parse succeeded, the semantic hints we gathered # are good for (hid, htype) in hint_dict.items(): self.source.localhints.post(hid, htype) finally: if classify_verbosity in self.source.verbose: self.source.notify("ending function prototype parse") return output def __detect_passthroughs(self, line=None): # Detect language-specific line pattern if line is None: line = self.io.peekline() for (pattern, lang) in FunctionSynopsisParser.language_lines: if pattern.search(line): return lang return None def __parse_function_synopsis_info(self): # Accept any number of lines as a FuncSynopsisInfo if classify_verbosity in self.source.verbose: self.source.notify("beginning function synopsis info parse") synopsisinfo = "" while True: skip_ignorables(self.source) line = self.io.peekline() if classify_verbosity in self.source.verbose: self.source.notify("candidate line: %s" % `line`) if line is None: break line = detroff(line) # Pass through blank lines if blankline.match(line): synopsisinfo += line self.io.popline() continue # Pass through breaks if line.startswith(""): self.io.popline() synopsisinfo += "\n" continue # Pass through C compiler invocation lines. Some libraries # insert these in command synopses. If we don't do this explicitly # here, it will look like a command synopsis and cause an error # at a later parse stage. if line.startswith("cc") or line.startswith("gcc"): synopsisinfo += line self.io.popline() continue # Also pass through anything that looks like a Qt section header if line.strip() in qt_headers: synopsisinfo += line self.io.popline() continue # Other things, like cpp directives, should pass through as well. # Test for single-line typedefs here so as not to have a bad # interaction with the token-pair code below. lang = self.__detect_passthroughs(line) if lang: if classify_verbosity in self.source.verbose: self.source.notify("from %s language identified as %s\n"% (`line`, lang)) self.language = lang synopsisinfo += line self.io.popline() continue # On the other hand, seeing ( means we have run into what should be # a function synopsis. Throw it back. if "(" in line: break # Pass through any line ending with semicolon. # This catches single-line C declarations that don't have an # obvious keyword up front. if line.endswith(";\n"): synopsisinfo += line self.io.popline() continue # Pass through any line ending with colon after a blank line. # This catches things like DEPRECATED: in the libpng pages. if not self.source.diversion[-1].strip() and line.endswith(":\n"): synopsisinfo += line self.io.popline() continue # Pass through line sequences bracketed by specified token pairs. # This is where we catch stuff like multiline struct declarations. for (start,end,lang,errmsg) in FunctionSynopsisParser.token_pairs: if start.match(line): if parse_verbosity in self.source.verbose: self.source.notify("Declaration starts with %s" % start) while self.io.lines: line = detroff(self.io.popline()) if parse_verbosity in self.source.verbose: self.source.notify(`line`) synopsisinfo += line # This is the magic that allows us to avoid elaborate # tokenization rules. Look for the terminator as the # suffix of a token. if end.search(line): break else: raise LiftException("missing end token for " + errmsg) else: # Nothing we recognize. Stop, and don't pop the current line break if classify_verbosity in self.source.verbose: self.source.notify("ending function synopsis info parse") if synopsisinfo: return "\n"+synopsisinfo+"\n" else: return "" class CommandSynopsisSequenceParser: "Parse a sequence of command synopses." opt_file_ext = re_compile(r"\[\.([a-zA-Z|.]+)\]") force_text = re_compile(r"\s[a-z]+\s[a-z]+\s[a-z]+\s[a-z]+\s") def __init__(self, io, source, refnames): self.io = io self.source = source self.refnames = refnames self.output = "" self.confirmed = False self.error = None self.context = None self.callnest = "" self.groupnest = 0 self.lastnest = [] # Arrange for lexical analysis to work self.io.tokenize(self.__pretokenize) if bsd_verbosity in self.source.verbose: self.source.notify("before reexpansion:" + `self.io`) while True: nextl = self.io.peekline() if nextl is None: break elif nextl.startswith("") or blankline.search(nextl): self.io.popline() continue else: nextpart = [] for line in self.io.lines: if line.startswith(""): break nextpart.append(line) if not filter(self.is_command_synopsis_line, nextpart): break output = self.parse_command_synopsis() if not output: break self.output += output if self.error: break self.io.tokenize() # Restore normal tokenization def __pretokenize(self, ln): ln = detroff(ln) # Fix a perldoc problem ln = ln.replace(r"\*(--", "--") # Remove ordinary troff highlight macros ln = troff_highlight_stripper.sub("", ln) # Convert . . . to ... ln = re.sub(r"\.\s+\.\s+\.", r"...", ln) # Grotty little hack to make lexical analysis trivial. I got # this idea from something I read about the first FORTRAN compiler. ln = CommandSynopsisSequenceParser.opt_file_ext.sub(r".@LB@\1@RB@", ln) ln = ln.replace(r"|.", r"|.") ln = ln.replace("][", "] @GLUE@ [") ln = ln.replace("|", " | ").replace("...", " ... ") ln = ln.replace("[", " [ ").replace("]", " ] ") ln = ln.replace("{", " { ").replace("}", " } ") ln = ln.replace("@LB@", "[").replace("@RB@", "]") # Identify and split up redirections # Ooops...have to be smarter than this! #ln = ln.replace(" <", " < ").replace(">", " > ") return ln def is_command_synopsis_line(self, rawline): "Does this look like a command synopsis, not just a string of words?" line = detroff(rawline) # Pipe bar is a sure sign. So is equals, for GNU-style declarations. if '|' in line or '=' in line or "..." in line or "[-" in line: return 1 # Don't be fooled by {}[] that are actually part of C declarations. # Otherwise we can end up trying to parse as command synopses some # things that should be treated as plain text. cph(1) is an example. has_c_keywords = False for keyword in c_declarators: if re.search(r"\b" + keyword + r"\b", line): has_c_keywords = True break # Look for special characters that could be part of either # function or command synopsis. ambiguous = False for c in ("{", "[", "]", "}"): if c in line: ambiguous = True break if ambiguous and not has_c_keywords: return 2 # We don't want to be fooled by text lines or option lists that # begin with a dash but continue with running text. if CommandSynopsisSequenceParser.force_text.search(line): return 0 # If the line begins with one of the command's aliases, always treat # as a synopsis line. This catches the important special case where # the command name occurs alone on the line, followed by lines # describing options. Also catches cases like "pf2afm fontfilename". # Check the global hints database, too. tokens = line.split() if len(tokens): if len(tokens[0]) and tokens[0] in self.refnames: return 3 if globalhints.get(tokens[0]) == "command": return 4 # If we see -1: return 6 # In mdoc, synopsis sections aren't allowed to contain running text. if self.source.in_synopsis() and self.source.is_active("mdoc"): return 7 # Look for option starts in syntax sections only. if line[0] == '-' or line.find(" -") > -1: return 8 # Now it gets iffy. We don't have many tokens on this line, or the # forcetext regexp would have caught it. Look at the raw line. # If the first token is bolded, that probably means it's a command # name that doesn't happen to match anything in the name section. # Apply this test only when we're in a synopsis section. if self.source.in_synopsis() and rawline.startswith(r"\fB") or rawline.startswith(TroffInterpreter.ctrl + r"B "): return 9 # Nope, doesn't look like a command synopsis line if classify_verbosity in self.source.verbose: self.source.notify("'%s' does not look like a synopsis line" % line.rstrip()) return 0 def parse_command_synopsis(self): "Translate a synopsis line -- here is where the heavy work starts." if classify_verbosity in self.source.verbose: self.source.notify("parse_command_synopsis begins: refnames are %s" % self.refnames.keys()) output = "" try: self.callnest = "" self.groupnest = 0 command = self.io.token_pop() self.refnames[command] = True if parse_verbosity in self.source.verbose: self.source.notify("Command is %s" % command) if command in self.refnames or is_file_or_command_name(command): globalhints.post(command, "command") output += (" %s" % command) else: self.io.token_push(command) raise LiftException("first token %s in synopsis looks wrong." % command) self.io.checkpoint() while self.io.lines: if is_nltext_line(self.io.lines[0]): break arg = self.__compile_arg() if arg == None: break output += " " + `arg` + "\n" # This is where we short-stop the command-synopsis parser # from eating trailing text sections. if `arg` == "" and self.io.lines and \ not self.is_command_synopsis_line(self.io.lines[0]): break return "\n"+output+"\n" except LiftException, e: self.error = "command synopsis parse failed on `%s' (%d): %s" % \ (self.io.token_peek(), self.io.token_index, e.message) self.io.unroll() # Generate a useful error message: self.context = "\n" if self.lastnest: self.context += " ".join(self.io.lookbehind[:self.lastnest[-1]]) self.context += " $ " self.context += " ".join(self.io.lookbehind[self.lastnest[-1]:]) else: self.context += " ".join(self.io.lookbehind[:self.io.token_index]) self.context += " ^ " self.context += " ".join(self.io.lookbehind[self.io.token_index:]) return "\n" + make_comment("\n" + self.error + "\n" + self.context) + "\n" # Lexical tests def __is_next_special(self): if self.io.token_peek() in ("[", "]", "{", "}", "|", "...", "*"): self.confirmed = True return True else: return False def __is_next_command(self): return self.io.token_peek() in self.refnames or globalhints.get(self.io.token_peek()) == "command" def __is_next_option(self): tnext = self.io.token_peek() if tnext and tnext[0] in ('-', '+') or tnext.startswith("±"): self.confirmed = True return True elif tnext and self.lastnest and tnext in ('&', '\'): # See tex.1 return True else: return False def __is_next_numeric(self): try: int(self.io.token_peek()) return True except (ValueError, TypeError): return False def __is_next_replaceable(self): tnext = self.io.token_peek() if tnext is None: return False # Good reasons for accepting funky leader characters: # @, % -- dig.1 # :, ', " -- perlrun.1 and other manual pages # = -- as.1 # , -- chmod.1 # . -- date.1 # # -- gphoto.1 # ? -- cdecl.1 and other places where ? invokes help. # / -- dummy filename arguments # \ -- TeX commands such as luatex.1 # & -- TeX commands such as luatex.1 elif tnext[0] in string.letters + "./=:'\"@%,#?\\&" or (tnext[:4] == "<" and tnext != "<") or self.__is_next_numeric() or is_file_or_command_name(tnext): return True # nm.1 elif re.match("[0-9]+_[0-9]+", tnext): self.source.warning("suspicious replaceable %s in synopsis" % tnext) return True else: return False # Manual-synopsis grammar def __compile_arg(self): try: self.callnest += " " if parse_verbosity in self.source.verbose: self.source.notify(self.callnest + "compile_arg(" + `self.io.token_peek()` + ")") res = self.__compile_arg1() if res == None: res = None # Failure is signaled by throwing an exception else: while self.io.token_peek() == "\n": self.io.token_pop() if self.io.token_peek() in ("...", "*"): self.io.token_pop() res.repeat = 1 elif self.io.token_peek() == "|": self.io.token_pop() first = res res = ParseNode("group") res.children.append(first) self.callnest += " " if parse_verbosity in self.source.verbose: self.source.notify("%sentering alternation"%self.callnest) while True: if self.io.token_peek() in ("|", "\n"): self.io.token_pop() continue if self.io.token_peek() not in ("]", "}") and not self.__is_next_command(): element = self.__compile_arg1() if element: res.children.append(element) else: return res continue break if parse_verbosity in self.source.verbose: self.source.notify("%sexiting alternation"%self.callnest) self.callnest = self.callnest[:-2] elif self.io.token_peek() == "@GLUE@": res = ParseNode(self.io.token_pop()) if parse_verbosity in self.source.verbose: self.source.notify("%scompile_arg() returns %s: tokens are %s" % (self.callnest, `res`, self.io.lookahead)) finally: self.callnest = self.callnest[:-2] return res def __compile_arg1(self): try: self.callnest += " " if parse_verbosity in self.source.verbose: self.source.notify(self.callnest + "compile_arg1(%s, %s)" % (`self.io.token_peek()`, self.io.lookahead)) # Now get an argument if self.io.token_peek() is None: if self.groupnest == 0: res = None else: raise LiftException("unbalanced group in synopsis markup") elif self.io.token_peek() == "": self.io.token_pop() while self.io.token_peek() == '\n': self.io.token_pop() if not self.__is_next_command(): res = ParseNode("sbr") elif self.groupnest == 0: res = None else: raise LiftException("unterminated group in synopsis") elif self.io.token_peek() == "\n": self.io.token_pop() if self.groupnest == 0 and self.__is_next_command(): res = None else: res = ParseNode("\n") elif self.__is_next_option(): option = self.io.token_pop() oldstyle = self.io.token_peek() == "@GLUE@" if oldstyle: self.io.token_pop() res = ParseNode("arg") gnustyle = option.split("=") if len(gnustyle) > 1: optnode = ParseNode("option", gnustyle[0]) res.children.append(optnode) optnode.righthand = gnustyle[1] else: optnode = ParseNode("option", option) res.children.append(optnode) if self.io.lookahead and self.__is_next_replaceable(): res.children.append(ParseNode("replaceable",self.io.token_pop())) if oldstyle: optnode.glue = "" else: optnode.glue = " " self.source.localhints.post(re.escape(optnode.token), "option") elif self.__is_next_replaceable(): res = ParseNode("arg") res.children.append(ParseNode("replaceable", self.io.token_pop())) elif self.io.token_peek() and self.io.token_peek()[:4] in ("<", ">"): res = ParseNode("redirect", None, "plain") res.token = self.io.token_pop() elif self.io.token_peek() in ("[", "{"): self.callnest += " " if parse_verbosity in self.source.verbose: self.source.notify("%sentering group"%self.callnest) self.groupnest += 1 self.lastnest.append(self.io.token_index) self.io.token_pop() if self.io.token_peek() == "{": required = "req" else: required = "opt" lst = [] while True: if self.io.token_peek() == '\n': self.io.token_pop() continue if self.io.token_peek() not in (None, "]", "}"): lst.append(self.__compile_arg()) continue break if len(lst) == 1: res = lst[0] else: res = ParseNode("arg") res.children = lst res.choice = required if self.io.token_peek() is None or self.io.token_peek() == "": raise LiftException("expecting ] or }") else: self.io.token_pop() self.lastnest.pop() self.groupnest -= 1 if parse_verbosity in self.source.verbose: self.source.notify("%sexiting group"%self.callnest) self.callnest = self.callnest[:-2] else: raise LiftException("expecting argument") if parse_verbosity in self.source.verbose: self.source.notify("%scompile_arg1() returns %s: tokens are %s" % (self.callnest, res, " ".join(self.io.lookahead))) finally: self.callnest = self.callnest[:-2] return res def is_nltext_line(line): "Are there patterns here that must be natural language?" if line is None: return False line = line.strip() if not line or len(line) < 2: return False # Line ending with period that is not part of an ellipsis has to be a # NL sentence, because lone periods can't occur in command # synopses and periods can't occur at all in function synopses. if line[-1] == '.' and line[-2].isalpha(): return True # Line ending with semicolon has to be a NL sentence. Note that # embedded colons can occur as argument leaders in, e.g., # port suffixes for some network commands. if line[-1] == ':': return True words = line.split() if len(line) < 8: return False if len(words) < 3: return False # Look for giveaway words. for word in ("the", "and", "with", "whitespace", "abbreviated"): if word in words: return True return False class DisplayParser: "Parse a block into function synopsis, command synopsis or display text." old_style_option_glue = re_compile(r"([^A-Za-z]-[A-Za-z]*)(?:\f.)([A-Za-z])") unparseable = re_compile(r"\$|=>|[^:]//") # Perl and other nightmares def __init__(self, source, try_synopsis, literal, refnames=None): "Arrange the interpreter to accumulate synopsis lines in this object." self.source = source self.try_synopsis = try_synopsis self.literal = literal self.refnames = refnames if self.refnames is None: self.refnames = {} self.synopses = [] source.diversion = self.synopses self.io = None source.ignore("nf") source.ignore("fi") source.ignore("ft") source.ignore("ti") # .ta conveys no information in a Synopsis section, # but outside one it may be our only clue that the man page # author kluged up a table inline. So don't disable # processing it in that case. if source.in_synopsis(): source.ignore("ta") source.ignore("ce") source.unignore("br") source.unignore("nl") source.unignore("in") def __wrap(self): # Re-enable normal commands self.source.diversion = self.source.output self.source.unignore("nf") self.source.unignore("fi") self.source.unignore("ft") self.source.unignore("ti") if self.source.in_synopsis(): self.source.unignore("ta") self.source.unignore("ce") self.source.ignore("br") self.source.ignore("nl") self.source.ignore("in") def __detect_unparseable_synopsis(self): "Detect stuff we just shouldn't try to parse." # Blank sections text = self.io.text().strip() if not text: return True # Or anything with Perl identifiers in it... if DisplayParser.unparseable.search(text): return True # Or Fortran synopses (as in the pvm bindings) if "Fortran" in text: return True # Also detect things that look like SQL synopses if text.split()[0].isupper() and self.source.find("SQL", backwards=True): return True return False def __emit_text(self, lines): if not lines: return "" if io_verbosity in self.source.verbose: self.source.notify("__emit_text('''%s''')\n" % "".join(lines)) for i in range(len(lines)): if lines[i].startswith("") and text.endswith("\n"): text = text[:-1] + "\n" return text def transform(self): "Parse and transform the display section we've gathered." if classify_verbosity in self.source.verbose: self.source.notify("display parse begins, refnames = %s"%self.refnames) # Undo redirection and re-enable normal commands. self.__wrap() # First, fold the lines. We have to handle continuations # explicitly, since we may be outside the body section. processed = [] for line in self.synopses: if line[:4] != "" % header) self.liststack.append("") elif "-bullet" in tokens[1:]: self.source.emit("" % header) self.liststack.append("") elif "-dash" in tokens[1:] or "-hyphen" in tokens[1:]: # See the comment near ip_tag_mapping self.source.emit("" % repr(header)) self.liststack.append("") elif "-item" in tokens[1:]: self.source.emit(""% repr(header)) self.liststack.append("") elif "-enum" in tokens[1:]: self.source.emit("" % repr(header)) self.liststack.append("") elif "-tag" in tokens[1:]: self.source.emit(""% repr(header)) self.liststack.append("") elif "-diag" in tokens[1:]: self.source.emit(""% repr(header)) self.liststack.append("") self.suppress_callables = True elif "-hang" in tokens[1:]: self.source.emit(""% repr(header)) self.liststack.append("") elif "-ohang" in tokens[1:]: self.source.emit(""% repr(header)) self.liststack.append("") elif "-inset" in tokens[1:]: self.source.emit(""% repr(header)) self.liststack.append("") elif command == "It": if self.liststack[-1] == "": # Columns into tables segments = [[]] for fld in args: if fld == "Ta": segments.append([]) else: segments[-1].append(fld) for (i, seg) in enumerate(segments): if seg[0] in MdocInterpreter.callable: segments[i] = self.macroeval(seg) else: segments[i] = " ".join(segments[i]) self.rowcount += 1 if self.rowcount == 1: self.source.emit("" % len(args)) self.source.emit(" ") for seg in segments: self.source.emit(" %s" % fontclose(seg)) self.source.emit(" ") else: # Otherwise we may have to close a previous entry if args: tagline = self.macroeval(["No"] + args) else: tagline = "" if self.itemcount[-1]: self.source.end_paragraph(label="It") self.source.emit("") if self.liststack[-1] == "": self.source.emit("") self.itemcount[-1] += 1 termlines = [tagline] while True: nextl = self.source.popline() if match_command(nextl, "It"): digested = lineparse(nextl) digested = self.macroeval(["No"] + digested[1:]) termlines.append(digested) else: self.source.pushline(nextl) break # We certainly have to open a new entry. if self.liststack[-1] == "": self.source.emit("") self.source.emit("%s" % fontclose("\n\n".join(termlines))) self.source.emit("") elif self.liststack[-1] == "": body = "\n".join(termlines) if body: self.source.emit("%s" % body) else: self.source.emit("") elif self.liststack[-1] == "": self.source.emit("") self.source.need_paragraph() elif command == "El": if self.liststack[-1] == "": self.source.emit(" ") else: self.source.end_paragraph(label="El") if self.liststack[-1] == "": self.source.emit("") self.source.emit("") elif self.liststack[-1] == "": self.source.emit("") elif self.liststack[-1] == "": if not self.source.endswith(""): self.source.emit("") self.source.emit(self.liststack.pop()) self.itemcount.pop() self.source.need_paragraph() elif command == "Rs": self.biblio.append({}) self.biblio[-1]["id"] = `len(self.biblio)` self.inref = True elif command == "Re": self.inref = False if self.source.output[-1] == "": self.source.output = self.source.output[:-1] else: self.source.end_paragraph(label="Re") self.source.emit("") # We'd like to emit a here, but the DocBook DTD # doesn't permit it. if self.source.docbook5: self.source.emit("" % self.source.make_id_from_title("ref" + `len(self.biblio)`)) else: self.source.emit("" % self.source.make_id_from_title("ref" + `len(self.biblio)`)) self.source.emit("[%s]" % len(self.biblio)) self.source.emit("") for (fld, tag) in ( \ ("A", None), \ ("Q", None), \ ("B", "citetitle"), \ ("V", None), \ ("J", None), \ ("N", None), \ ("P", None), \ ("R", None), \ ("T", "citetitle"), \ ("D", None), \ ("I", None), \ ("C", None), \ ("O", None), \ ): if self.biblio[-1].has_key(fld): line = "" if tag: line += "<%s>" % tag line += ", ".join(self.biblio[-1][fld]) if tag: line += "" % tag line += ";" self.source.emit(line) self.source.emit("") self.source.emit("\n") self.source.emit("\n") # Not documented, but present in the macro files elif command == "Ud": self.source.pushline("currently under development") else: return False return True # Machinery for evaluating parsed macros begins here def evalmacro(self, args): "Pop args off the stack and evaluate any associated macro." if bsd_verbosity in self.source.verbose: self.source.notify("evalmacro(%s)" % ", ".join(map(repr, args))) cmd = args.pop(0) if cmd in self.ignore_set: # In case we get keeps with .Oo/Oc while True: end = args.pop(0) if end == '\n': break return "" elif cmd == "Ad": # We don't care. We're translating it... #self.source.warning("the Ad macro is deprecated.") return self.encloseargs(args,"","") elif cmd == "Ai": return ["ANSI"] elif cmd == "An": if self.hasargs("An", args): return self.encloseargs(args, "phrase", "role='author'") elif cmd == "Ap": return self.replacemacro(args, "'@GLUE@") elif cmd == "Aq": return self.encloseargs(args, "<@GLUE@", "@GLUE@>") elif cmd == "Ac": return self.replacemacro(args, "@GLUE@>") elif cmd == "Ao": return self.replacemacro(args, "<@GLUE@") elif cmd == "Ar": if not args: return ["file..."] else: return self.styleargs(args, "replaceable") elif cmd == "At": return ["AT&T Unix"] elif cmd == "Bc": return self.replacemacro(args, "@GLUE@]") elif cmd == "Bo": return self.replacemacro(args, "[@GLUE@") elif cmd == "Bq": return self.encloseargs(args, "[@GLUE@", "@GLUE@]") elif cmd == "Brq": return self.encloseargs(args, "{@GLUE@", "@GLUE@}") elif cmd == "Bx": def bxhelper(args): if not args: return ["BSD UNIX"] else: return ["-".join(["%sBSD" % args[0]] + args[1:])] return self.process_punct(args, bxhelper, True) elif cmd == "Cm": if self.hasargs("Cm", args): return self.styleargs(args, "command") elif cmd == "Dc": return self.replacemacro(args, "@GLUE@”") elif cmd == "Do": return self.replacemacro(args, "“@GLUE@") elif cmd == "Dq": return self.encloseargs(args, "“@GLUE@", "@GLUE@”") elif cmd == "Dv": if self.hasargs("Dv", args): return self.styleargs(args, "constant") elif cmd == "Em": if self.hasargs("Em", args): return self.styleargs(args, "emphasis", "remap='Em'") elif cmd == "Eq": return self.encloseargs(args[2:], args[0]+"@GLUE@", "@GLUE@&"+args[1]) elif cmd == "Er": if self.hasargs("Er", args): return self.styleargs(args, "errorcode") elif cmd == "Ev": if self.hasargs("Ev", args): return self.styleargs(args, "envar") elif cmd == "Fa": if self.source.in_synopsis(): return map(lambda x: x+",", self.process_punct(args)) else: return self.styleargs(args, "emphasis", "remap='Fa'") elif cmd == "Fl": if not args: return ["-"] else: dashes = '-' while args and args[0] == 'Fl': dashes += '-' args.pop(0) args[0] = dashes + args[0] return self.styleargs(args, "option", "", "") elif cmd == "Ic": if self.hasargs("Ic", args): return self.styleargs(args, "command", "remap='Ic'") elif cmd == "Lb": return self.process_punct(args, self.lbhook, True) elif cmd == "Li": return self.styleargs(args, "literal") elif cmd == "Ms": return self.styleargs(args,"literal") elif cmd == "Mt": return self.encloseargs(args,"","") elif cmd == "Nd": savesect = [" ".join(self.encloseargs(args, "", ""))] while True: line = self.source.popline() if match_command(line, "Sh"): self.source.pushline(line) break else: savesect.append(line) lines = [] self.source.interpret_block(savesect, lines) self.desc = " ".join(lines) if not self.source.body_section(): return [] else: return self.desc elif cmd == "Nm": name = " ".join(self.encloseargs(args, "", "")) if not self.name: self.name = name self.refnames[name] = True if self.source.sectname and name_synonyms.match(self.source.sectname): return [] else: if not name: name = self.name return ["%s" % self.name] elif cmd == "No": return self.replacemacro(args, "") elif cmd == "Ns": return self.replacemacro(args, "@GLUE@") elif cmd == "Oc": return self.replacemacro(args, "@GLUE@]") elif cmd == "Oo": return self.replacemacro(args, "[@GLUE@") elif cmd == "Op": return self.styleargs(args, ("[@GLUE@", "@GLUE@]")) elif cmd == "Pa": if self.source.in_synopsis(): return self.styleargs(args, "replaceable") else: return self.styleargs(args, "filename") elif cmd == "Pc": return self.replacemacro(args, "@GLUE@)") elif cmd == "Pf": # We don't want punctuation processing here operands = [] while args: if args[0] in MdocInterpreter.callable: break this = args.pop(0) operands.append(this) if this == '\n': break if len(operands) > 1: return [operands[0],"@GLUE@"] + operands[1:] else: return [operands[0],"@GLUE@"] elif cmd == "Po": return self.replacemacro(args, "(@GLUE@") elif cmd == "Pq": return self.encloseargs(args, "(@GLUE@", "@GLUE@)") elif cmd == "Px": return ["POSIX"] elif cmd == "Ql": return self.encloseargs(args, "'", "'") elif cmd == "Qc": return self.replacemacro(args, "@GLUE@\"") elif cmd == "Qo": return self.replacemacro(args, "\"@GLUE@") elif cmd == "Qq": return self.encloseargs(args, '"@GLUE@', '@GLUE@"') elif cmd == "Sc": return self.replacemacro(args, "@GLUE@\'") elif cmd == "So": return self.replacemacro(args, "\'@GLUE@") elif cmd == "Sq": return self.encloseargs(args, "`@GLUE@", "@GLUE@\'") elif cmd == "St": return self.process_punct(args, self.sthook, True) elif cmd == "Sx": #title = " ".join(args) return self.process_punct(args, lambda x: ["%s" % (self.source.id_from_title(" ".join(x)), " ".join(x))], False) elif cmd == "Sy": return self.styleargs(args, "emphasis", 'remap="Sy"') elif cmd == "Ta": return self.replacemacro(args, "\t") elif cmd == "Tn": # We used to set this with an acronym tag, following an older # version of the mdoc manual, but that won't work - among # other things, groff_mdoc(7) uses it at presentation level # to set contents items in small caps. return self.styleargs(args, "phrase", "remap='Tn'") elif cmd == "Ux": return ["Unix"] elif cmd == "Va": return self.styleargs(args, "varname") elif cmd == "Vt": return self.styleargs(args, "type") elif cmd == "Xc": return self.replacemacro(args, "") elif cmd == "Xo": return self.replacemacro(args, "") elif cmd == "Xr": return self.process_punct(args, self.xrhook, False) elif cmd[0] == "%": lst = self.process_punct(args, lambda x: self.bibliohook(cmd[1], x), True) if self.inref: return [] else: return lst # Sm is not officially parseable, but we have to treat it that way # in order for it to work inside Oo/Oc pairs (as in slogin.1). elif cmd == "Sm": enable = self.extractargs(args) if "on" in enable: self.spacemode = True elif "off" in enable: self.spacemode = False else: self.source.error("unknown argument to Sm") return [] else: self.source.error("unknown parseable macro " + `cmd`) return [] def bibliohook(self, field, lst): ref = " ".join(lst) if self.inref: # If we're within the scope of an Rs/Re, accumulate entry. if not self.biblio[-1].has_key(field): self.biblio[-1][field] = [] self.biblio[-1][field].append(ref) # Unresolved titles can simply turn into a title citation if field == "T": return ["%s" % (ref)] # Otherwise return the reference. else: for entry in self.biblio: if field in entry and ref in entry[field]: return ["[%s]" % (entry["id"], entry["id"])] else: raise LiftException("unresolved reference to '%s'" % ref) def sthook(self, args): if args[0] in MdocInterpreter.st_dict: return["" + MdocInterpreter.st_dict[args[0]] + ""] else: raise LiftException("unknown St macro '%s'" % args[0]) def lbhook(self, args): if args[0] in MdocInterpreter.lb_dict: return["" + MdocInterpreter.lb_dict[args[0]] + ""] else: raise LiftException("unknown Lb macro '%s'" % args[0]) def xrhook(self, args): if len(args) < 2: return ["%s" % args[0]] else: return ["%s%s" % (args[0], args[1])] def extractargs(self, args, stop_on_callable=0): operands = [] while args: if stop_on_callable and args[0] in MdocInterpreter.callable: break this = args.pop(0) operands.append(this) if this == '\n': break return operands def process_punct(self, args, hook=None, stop_on_callable=False): "Wrap required processing of punctuation around an evaluation." prepunct = [] postpunct = [] # Save leading punctuation while args and args[0] in MdocInterpreter.openers: prepunct.append(args.pop(0)) while args and args[-1] in MdocInterpreter.closers: postpunct = [args.pop()] + postpunct operands = [] while args: if stop_on_callable and args[0] in MdocInterpreter.callable: break this = args.pop(0) operands.append(this) if this == '\n': break if hook: operands = prepunct + hook(operands) + postpunct else: operands = prepunct + operands + postpunct result = [] for arg in operands: if arg in MdocInterpreter.closers: result.append("@GLUE@" + arg) elif arg in MdocInterpreter.openers: result.append(arg + "@GLUE@") else: result.append(arg) return result def encloseargs(self, args, opener, closer): "Grab and process arguments for an enclosure macro." return self.process_punct(args, lambda x: [opener] + x + [closer], False) def stylehook(self, args, tag, attr, dummy_prefix): "Wrap non-punctuation characters in given tag pair." result = [] if attr: attr = " " + attr if len(tag) == 2: start = tag[0] + attr end = tag[1] else: start = "<" + tag + attr + ">" end = "" for arg in args: if arg == "|" or arg in self.openers or arg in self.closers: result.append(arg) else: result.append(start + arg + end) return result def styleargs(self, args, tag, attribute="", prefix=""): return self.process_punct(args, lambda x: self.stylehook(x, tag, attribute, prefix), 1) def replacemacro(self, args, withmac): return self.process_punct(args, lambda x: [withmac] + x, 1) def macroeval(self, args): "Evaluate a macro, returning a list." if bsd_verbosity in self.source.verbose: self.source.notify("macroeval%s\n" % (tuple(args),)) if args[0][0] == '.': args[0] = args[0][1:] # Consume arguments and macro calls until none are left result = [] while args: nextpart = filter(lambda x: x, self.evalmacro(args)) if not self.spacemode and len(nextpart) > 1: for ind in range(len(nextpart)): nextpart.insert(2*ind+1, "@GLUE@") if bsd_verbosity in self.source.verbose: self.source.notify("evalmacro -> %s" % nextpart) result += nextpart # Glue the results together result = " ".join(result) result = hotglue.sub("", result) result = cleantag.sub("", result) if bsd_verbosity in self.source.verbose: self.source.notify("macroeval -> %s\n" % repr(result)) return result def preprocess(self, text): return text def postprocess(self, text): # It's not an error for Sx references to point elsewhere link_re = re_compile("([A-Za-z_]*)") while True: m = link_re.search(text) if m: linkstart = m.start(0) linkend = m.end(0) mid = m.group(1) label = m.group(2) if self.source.id_exists(mid): text = text[:linkstart+6] + text[linkstart+15:] else: self.source.warning("unresolved Sx label %s" % label) text = text[:linkstart] + \ "%s" % label + \ text[linkend:] else: break # Ugh...this can be produced by ,It .Xo/Xc; there's an example on # groff_mdoc(7). text = text.replace("\n", "") # Sanity check if not self.source.section_count: raise LiftException("no mdoc section structure, can't be lifted.") return text class MsInterpreter: "Interpret ms(7) macros." name = "ms" exclusive = True toptag = "article" immutable_set = set([]) ignore_set = set([ # Ignore presentation-level-only requests from Bell Labs. "RP", "ND", "DA", "1C", "2C", "MC", "BX", "KS", "KE", "KF", # Also ignore the Berkeley thesis-mode extension "TM", "CT", "XS", "XE", "XA", "PX", "AM", "EH", "OH", "EF", "OF", # These are not documented in the ms reference, but # they occur in ms papers, probably as relics from mm. "MH", "CS", "D3" ]) complain_set = set(["RS", "RE",]) parabreak_set = set(["blank","PP", "LP", "XP", "IP",]) sectionbreak_set = set(["NH", "SH", "SC",]) listbreak_set = set(["PP", "LP", "XP", "NH", "SH", "SC",]) translations = { "\\*" : [ # The Bell Labs prefix diacriticals (r"\*'", "´"), (r"\*`", "`"), (r"\*:", "¨"), (r"\*^", "ˆ"), (r"\*~", "˜"), (r"\*C", "ˇ"), (r"\*,", "¸"), # Berkeley extensions #(r"\**(_", "—"), # Input text was "\e\(**\^\u_\d" in original (r"\**(Q", "“"), (r"\**(U", "”"), # Berkeley postscript diacriticals (r"\**('", "`"), (r"\**(`", "´"), (r"\**(^", "ˆ"), (r"\**(,", "¸"), (r"\**(?", "¿"), (r"\**(!", "¡"), (r"\**(v", "ˇ"), (r"\**(_", "¯"), (r"\**.", "&udot;"), # Internal pseudo-entity (r"\**/", "ø"), (r"\**o", "Å"), (r"\**(3t", "&yogh;"), # Internal pseudo-entity (r"\**(Th", "Þ"), (r"\**(th", "þ"), (r"\**(D-", "Ð"), (r"\**(d-", "ð"), (r"\**q", "&ohook;"), # Internal pseudo-entity (r"\**(ae", "æ"), (r"\**(Ae", "Æ"), (r"\**(oe", "œ"), (r"\**(Oe", "&Oelig;"), ] } def __init__(self, source, verbose=0): self.source = source self.verbose = verbose self.font = "R" self.pointsize = 0 self.fmt = "R" self.author = Author() self.TL = None self.AU = None self.AI = [] self.AB = None self.flushed = False def interpret(self, dummy, tokens, caller): command = tokens[0][1:] args = tokens[1:] if command in ("B", "I", "R", "UL", "SM", "LG", "NL"): # Get our remap attribute in sync with other macro sets. if command == "UL": command="U" # Could be a change along either axis newpointsize = self.pointsize newfont = self.font if command == "NL": newpointsize = 0 elif command == "LG": newpointsize += 1 elif command == "SM": newpointsize += -1 else: newfont = command # If no actual change (as with two successive .NLs), we're done. if self.font == newfont and self.pointsize == newpointsize: return True if newpointsize == 0: fmt = newfont else: fmt = newfont + `newpointsize` if self.fmt == "R": if not args: self.source.emit(r"\f%s" % fmt) else: self.source.emit(r"\f%s%s\fP" % (fmt, args[0])) elif fmt == "R": if not args: self.source.emit(r"\fP") else: self.source.emit(r"\fP%s\f%s" % (args[0], self.fmt)) if not args: self.font = newfont self.pointsize = newpointsize self.fmt = fmt return True elif command == "B1": self.source.emit(r"") elif command == "B2": self.source.emit(r"") # Commands for front matter elif command == "TL": self.source.declare_body_start() self.TL = gather_lines(self.source) return True elif command == "OK": # Undocumented -- probably some Bell Labs thing gather_lines(self.source) return True elif command == "AU": self.AU = gather_lines(self.source) return True elif command == "AI": self.AI = gather_lines(self.source) return True elif command == "AB": self.AB = [] while self.source.lines: line = self.source.popline() tokens = lineparse(line) if tokens and tokens[0][1:3] == "AE": break if not (is_command(line) and self.source.ignorable(line)): self.AB.append(line) return True # Here's where we analyze the front matter and generate the header if not self.flushed: self.source.in_preamble = False if io_verbosity in self.source.verbose: self.source.notify("exiting preamble") self.flushed = True # If there's only one line of authors, try to break it up by # looking for " and ". There are a couple of historical examples # of this, notably in the EQN docs. if self.AU: if len(self.AU) == 1: trial = self.AU[0].split(" and ") if trial > 1: self.AU = trial else: # We'll also try splitting on commas trial = self.AU[0].split(", ") if trial > 1: self.AU = trial # Now we have one author per line. Try to analyze each name. digested = [] for name in self.AU: author = Author(name) if self.AI: author.orgname = " ".join(self.AI) digested.append(author) # OK, we've got enough info to generate the header if self.TL or self.AU or self.AI or self.AB: self.source.end_paragraph(label="ms header") self.source.emit("") if self.TL: self.source.emit("") caller.interpret_block(self.TL) self.source.emit("") for self.author in digested: if self.author.nonempty(): self.source.emit(`author`) if self.AB: self.source.emit("") self.source.need_paragraph() caller.interpret_block(self.AB) self.source.end_paragraph(label="AB") self.source.emit("") self.source.emit("") if command in ("blank","PP","LP","XP") or command == "IP" and len(tokens) == 1: self.source.paragraph() elif command in ("NH", "SH"): title = self.source.popline() try: newdepth = int(tokens[1]) except ValueError: newdepth = 1 self.source.push_section(newdepth, title) elif command == "IP": # If no tag is specified, treat as ordinary paragraph. self.source.end_paragraph(label="IP") # Some tags can turn into an itemized list. if tokens[1] in ip_tag_mapping: self.source.pushline(quoteargs(tokens)) gather_itemizedlist(TroffInterpreter.ctrl + "IP", self.source, ip_tag_mapping[tokens[1]]) # Otherwise, emit a variable list else: self.source.emit_variablelist(command, tokens[1]) elif command == "QP": self.source.begin_block("blockquote", remap="QP") while self.source.lines: line = self.source.popline() if is_command(line): self.source.pushline(line) break self.source.emit(line) self.source.end_block("blockquote", remap="QE") elif command == "DS": self.source.begin_block("literallayout", remap='DS') elif command == "DE": self.source.end_block("literallayout", remap='DE') elif command == "FS": self.source.begin_block("footnote", remap='FS') elif command == "FE": self.source.end_block("footnote", remap='FE') elif command == "QS": self.source.begin_block("blockquote", remap='QS') elif command == "QE": self.source.end_block("blockquote", remap='QE') # Undocumented Bell Labs-isms begin here elif command == "UX": self.source.pushline("Unix") return True elif command == "UC": self.source.pushline("%s" % args[0]) return True elif command == "SC": self.source.push_section(1, args[0]) elif command == "P1" and self.source.find("P2"): self.source.begin_block("programlisting", remap='P1') elif command == "P2": self.source.end_block("programlisting", remap='P2') else: return False return True def preprocess(self, text): return text def postprocess(self, text): return text class MeInterpreter: "Interpret me macros." name = "me" exclusive = True toptag = "article" immutable_set = set([]) ignore_set = set(["1c","2c","bc","bl","ef","eh","ep","fo", "he","hx","m1","m2","m3","m4","n1","n2", "of","oh","tp","xl","xp","sk","(z",")z", "sz","(l",")l", ]) complain_set = set(["ba","bx","ix","(b",")b","(c",")c","pa", "sx","uh",".$p",".$c",".$f",".$h",".$s", "+c","(x",")x", ]) parabreak_set = set(["blank","lp","pp","ip","np",]) sectionbreak_set = set(["sh",]) listbreak_set = set(["lp","pp","np","sh",]) translations = { "\\*" : [ (r"\*-", "–"), # Not quite right, supposed to be 3/4 dash (r"\*:", "¨"), (r"\*<", ""), (r"\*>", ""), (r"\*{", ""), (r"\*}", ""), (r"\*('", "´"), (r"\*(`", "`"), (r"\*^", "ˆ"), (r"\*,", "¸"), (r"\*~", "˜"), (r"\*(qe", "∃"), (r"\*(qa", "∀"), ], "\\(" : [ (r"\('", "´"), (r"\(`", "`"), (r"\(lq", "“"), (r"\(rq", "”"), ] } # List how .IP tags map into DocBook mark types def __init__(self, source, verbose=0): self.source = source self.verbose = verbose self.delay = [] self.in_abstract = False self.source.in_preamble = False if io_verbosity in self.source.verbose: self.source.notify("exiting preamble") def interpret(self, dummy, tokens, dummy_caller): cmd = tokens[0][1:] args = tokens[1:] if cmd in ("b", "bi", "i", "r", "rb", "sm", "u"): if len(args) <= 2: trailer = "" else: trailer = args[1] self.source.pushline(self.source.direct_highlight(cmd.upper(), [args[0]], trailer)) elif cmd == "q": if len(args) <= 2: trailer = "" else: trailer = args[1] self.source.pushline("%s%s" % (args[0], trailer)) elif cmd in ("blank", "lp", "pp"): self.source.declare_body_start() self.source.paragraph() elif cmd == "ip": self.source.emit_variablelist("ip", args[1]) elif cmd == "bp": self.source.pushline(quoteargs(tokens)) gather_itemizedlist(TroffInterpreter.ctrl + "bp", self.source, "bullet") elif cmd == "np": self.source.pushline(quoteargs(tokens)) gather_orderedlist(TroffInterpreter.ctrl + "np", self.source, "bullet") elif cmd == "(q": self.source.begin_block("blockquote", remap='(q') elif cmd == ")q": self.source.end_block("blockquote", remap=')q') elif cmd == "(f": self.source.begin_block("footnote", remap='(q') elif cmd == ")f": self.source.end_block("footnote", remap=')q') elif cmd == "(d": self.source.diversion = self.delay elif cmd == ")d": self.source.diversion = self.source.output elif cmd == "pd": self.source.output += self.delay self.delay = [] elif cmd == "sh": self.source.push_section(int(tokens[1]), tokens[2]) elif cmd == "++": if tokens[1] == "AB": self.in_abstract = True self.source.emit("") elif self.in_abstract: self.in_abstract = False self.source.emit("") else: return False return True def preprocess(self, text): return text def postprocess(self, text): return text class MmInterpreter: "Interpret mm(7) macros." name = "mm" exclusive = True toptag = "article" immutable_set = set(["B", "I", "R", "BI", "BR", "IB", "IR", "RB", "RI", "AE", "AF", "AL", "RL", "APP", "APPSK", "AS", "AT", "AU", "B1", "B2", "BE", "BL", "ML", "BS", "BVL", "VL", "DE", "DL", "DS", "FE", "FS", "H", "HU", "IA", "IE", "IND", "LB", "LC", "LE", "LI", "P", "RF", "SM", "TL", "VERBOFF", "VERBON", "WA", "WE", ]) ignore_set = set([")E", "1C", "2C", "AST", "AV", "AVL", "COVER", "COVEND", "EF", "EH", "EDP", "EPIC", "FC", "FD", "HC", "HM", "GETR", "GETST", "INITI", "INITR", "INDP", "ISODATE", "MT", "NS", "ND", "OF", "OH", "OP", "PGFORM", "PGNH", "PE", "PF", "PH", "RP", "S", "SA", "SP", "SG", "SK", "TAB", "TB", "TC", "VM", "WC"]) complain_set = set(["EC", "EX", "FG", "GETHN", "GETPN", "GETR", "GETST", "LT", "LD", "LO", "MOVE", "MULB", "MULN", "MULE", "NCOL", "nP", "PIC", "RD", "RS", "RE", "SETR", ]) parabreak_set = set([]) sectionbreak_set = set([]) listbreak_set = set([]) translations = { "\\*" : [ (r"\*F", ""), # Assumes that footnote marks are adjacent to footnotes ] } reductions = {} # Specific to this interpreter markdict = {"1":"arabic", "A":"upperalpha", "a":"loweralpha", "I":"upperroman", "i":"lowerroman"} def __init__(self, source, verbose=0): self.source = source self.verbose = verbose self.liststack = [] self.listcount = [] self.flushed = False self.author = Author() self.TL = self.AS = None def end_list(self): if self.listcount[-1]: self.source.end_paragraph(label="end_list") self.source.emit("") if self.liststack[-1] == "": self.source.emit("") self.source.emit(self.liststack.pop()) self.listcount.pop() def fold_highlights(self, cmd, args): # We need this to be a separate entry point for TP tag processing. if cmd in ("B", "I", "R"): return self.source.alternating_highlight(cmd + "P", args) elif cmd in ("BI", "BR", "IB", "IR", "RB", "RI"): return self.source.alternating_highlight(cmd, args) else: return None def interpret(self, dummy, tokens, caller): cmd = tokens[0][1:] args = tokens[1:] # Highlighting highlighted = self.fold_highlights(cmd, args) if highlighted: self.source.emit(highlighted) return True # Commands for front matter elif cmd == "TL": self.source.declare_body_start() self.TL = gather_lines(self.source) return True elif cmd == "AF": self.author.orgname = args[0] return True elif cmd == "AU": self.author.name(args[0]) self.author.orgdiv = " ".join(args[1:]) return True elif cmd == "AT": self.author.jobtitle = args[0] return True elif cmd == "AS": self.AS = [] while self.source.lines: line = self.source.popline() tokens = lineparse(line) if tokens and tokens[0][1:3] == "AE": break if not (is_command(line) and self.source.ignorable(line)): self.AS.append(line) return True # Here's where we analyze the front matter and generate the header if not self.flushed: self.source.in_preamble = False if io_verbosity in self.source.verbose: self.source.notify("exiting preamble") self.flushed = True # OK, we've got enough info to generate the header if self.TL or self.AS or self.author.nonempty(): self.source.end_paragraph(label="mm header") self.source.emit("") if self.TL: self.source.emit("") caller.interpret_block(self.TL) self.source.emit("") if self.author.nonempty(): self.source.emit(`self.author`) if self.AS: self.source.emit("") self.source.need_paragraph() caller.interpret_block(self.AS) self.source.end_paragraph(label="AS") self.source.emit("") self.source.emit("") # Ordinary formatting comands. if cmd == "AE": pass # Already handled by AS elif cmd == "AL" or cmd == "RL": enumeration = 'arabic' spacing = 'normal' if args: spec = MmInterpreter.markdict.get(args[0]) if not spec: self.source.error("unknown enumeration type %s in AL" % args[0]) else: enumeration = spec if len(args) >= 3: spacing = 'compact' self.source.emit("" % (enumeration, spacing)) self.liststack.append("") self.listcount.append(0) elif cmd == "APP" or cmd == "APPSK": name = args[0] text = args[1 + (cmd == "APPSK")] self.source.troff.strings["Apptxt"] = " ".join(text) self.source.emit("%s" % name) elif cmd == "AS": self.source.emit("") self.source.need_paragraph() elif cmd == "B1": self.source.begin_block("sidebar", remap="B1") elif cmd == "B2": self.source.end_block(r"sidebar", remap="B2") elif cmd == "BE": self.source.paragraph("End of BS/BE block") elif cmd == "BL" or cmd == "ML": if len(args) == 2: spacing = 'compact' else: spacing = 'normal' self.source.emit("" % spacing) self.liststack.append("") self.listcount.append(0) elif cmd == "BS": self.source.warning("BS/BE block may need to be moved, see FIXME") self.source.paragraph("FIXME: BS/BE block may need to be moved") elif cmd == "BVL" or cmd == "VL": self.source.emit("") self.liststack.append("") self.listcount.append(0) elif cmd == "DE": self.source.end_block("literallayout", remap="DE") elif cmd == "DL": if len(args) == 2: spacing = 'compact' else: spacing = 'normal' self.source.emit("" % spacing) self.liststack.append("") elif cmd == "DS" or cmd == "DF": self.source.begin_block("literallayout", remap=cmd) elif cmd == "FE": self.source.end_block("footnote", remap="FE") elif cmd == "FS": self.source.begin_block("footnote", remap="FE") elif cmd == "H": for level in self.liststack: self.end_list() level = int(args[0]) heading_text = heading_suffix = "" if len(args) > 1: heading_text = args[1] if len(args) > 2: heading_suffix = args[1] self.source.push_section(level, heading_text + heading_suffix) elif cmd == "HU": heading_text = args[0] for level in self.liststack: self.end_list() self.source.push_section(self.source.sectiondepth, heading_text, makeid=0) # We can ignore H[XYZ] as they are user-defined exits elif cmd == "IA": self.source.emit("") elif cmd == "IND": self.source.pushline(self.source.index(map(deemphasize, args))) elif cmd == "LB": itype = int(args[3]) mark = "1" if len(args) > 4: mark = args[4] if itype == 0: # Not strictly correct -- what LB really wants us to do # is generate a mark from the mark argument. self.source.emit("" % spacing) self.liststack.append("") else: spec = MmInterpreter.markdict.get(mark) if not spec: self.source.error("unknown enumeration type %s in LB"%mark) enumeration = 'arabic' else: enumeration = spec self.source.emit("" % enumeration) self.liststack.append("") self.listcount.append(0) elif cmd == "LC": for level in self.liststack: self.end_list() elif cmd == "LE": self.end_list() elif cmd == "LI": mark = "" if len(args) > 0: mark = args[0] # FIXME: process second argument # End previous entry if self.listcount[-1]: self.source.end_paragraph(label="LI") self.source.emit("") if self.liststack[-1] == "": self.source.emit("") # Begin this entry if self.liststack[-1] == "": self.source.emit("") self.source.emit("%s" % fontclose(mark)) self.source.emit("") self.source.need_paragraph() # Bump counter self.listcount[-1] += 1 elif cmd == "P" or cmd == "blank": self.source.paragraph() elif cmd == "RF": self.source.emit("Reference end -->") elif cmd == "SM": if len(args) > 2: self.source.pushline(r"%s\fS%s\fP%s" % args) else: self.source.pushline(r"\fS%s\fP%s" % args) # We can ignore user exits, TP, TX, TY. elif cmd == "VERBOFF": self.source.end_block("literallayout", remap='VERBOFF') elif cmd == "VERBON": self.source.begin_block("literallayout", remap='VERBON') elif cmd == "WA": self.source.emit("") # Unknown command. else: return False return True def preprocess(self, text): return text def postprocess(self, text): return text class MwwwInterpreter: "Interpret mwww(7) macros." name = "mwww" exclusive = False toptag = "article" immutable_set = set(["HX", "BCL", "BGIMG", "URL", "MTO", "FTP", "IMG", "HTML", "TAG", "HR",]) ignore_set = set(["HX", "BCL", "BGIMG", "HTML", "HR", "LK", "NHR", "HnS", "HnE", "DC", "HTL", ]) complain_set = set([]) parabreak_set = set([]) sectionbreak_set = set([]) listbreak_set = set([]) translations = {} reductions = {} def __init__(self, source, verbose=0): self.source = source self.verbose = verbose def interpret(self, dummy_line, tokens, dummy_caller): cmd = tokens[0][1:] args = tokens[1:] if len(args) == 1: args.append("") if len(args) == 2: args.append("") def make_url(url, txt, after): return '%s%s' % (url,txt,after) # Ordinary formatting comands. if cmd == "URL": self.source.pushline(make_url(args[0], args[1], args[2])) elif cmd == "MTO": self.source.pushline(make_url(args[0], "mailto:"+args[1], args[2])) elif cmd == "FTP": self.source.pushline(make_url(args[0], args[1], args[2])) elif cmd == "IMG": ifile = args[1] self.source.pushline('\n\n' % ifile) elif cmd == "PIMG": ifile = args[1] self.source.pushline('\n\n' % ifile) elif cmd == "TAG": if self.source.docbook5: self.source.pushline('' % (self.source.make_id_from_title(args[0]),)) else: self.source.pushline('' % (self.source.make_id_from_title(args[0]),)) elif cmd == "ULS": self.source.pushline("") elif cmd == "ULE": self.source.pushline("") elif cmd == "LI": self.source.error("LI is not yet supported, because it's not documented.") # Unknown command. else: return False return True def preprocess(self, text): return text def postprocess(self, text): return text # This is how we autodetect the right macro set: interpreter_dispatch = { "pp": MeInterpreter, "Dt": MdocInterpreter, "Dd": MdocInterpreter, "Nm": MdocInterpreter, "AU": MsInterpreter, "NH": MsInterpreter, "TH": ManInterpreter, "MT": MmInterpreter, "SA": MmInterpreter, "COVER": MmInterpreter, # Extension macro sets "supplemental macros used in Tcl/Tk": TkManInterpreter, "BS": TkManInterpreter, "the F register is turned on": Pod2ManInterpreter, "ZN": XManInterpreter, "Pn": XManInterpreter, "ny0": XManInterpreter, "reStructuredText": reStructuredTextInterpreter, "reStructeredText": reStructuredTextInterpreter, "DocBook XSL Stylesheets" : DocBookInterpreter, "pdfdest" : FoojzsInterpreter, "H0": ASTInterpreter, # These are all of the supported Mwww tags "URL": MwwwInterpreter, "FTP": MwwwInterpreter, "MTO": MwwwInterpreter, "PIMG": MwwwInterpreter, "IMG": MwwwInterpreter, "TAG": MwwwInterpreter, } mso_dispatch = { "e.tmac": MeInterpreter, "doc.tmac": MdocInterpreter, "s.tmac": MsInterpreter, "an.tmac": ManInterpreter, "m.tmac": MmInterpreter, "www.tmac": MwwwInterpreter, } required_extensions = { MeInterpreter: "me", MsInterpreter: "ms", MmInterpreter: "mm", } # # Invocation machinery starts here # def transfile(name, arguments, translate_data, trans_filename=None): "Read input sources entire and transform them in memory." if not arguments: outdoc = translate_data(name, "stdin", sys.stdin.read(), False) if outdoc: stdout.write(outdoc) else: for ifile in arguments: infp = open(ifile) indoc = infp.read() infp.close() tmpfile = ifile + ".~%s-%d~" % (name, os.getpid()) try: outfp = open(tmpfile, "w") except OSError: stderr.write("%s: can't open tempfile" % name) return True try: outdoc = translate_data(name, ifile, indoc, len(arguments)>1) except: os.remove(tmpfile) # Pass the exception upwards (exc_type, exc_value, exc_traceback) = sys.exc_info() raise exc_type, exc_value, exc_traceback if outdoc == indoc: os.remove(tmpfile) if outdoc is None: continue else: outfp.write(outdoc) outfp.close() # under Windows you can't rename an open file if not trans_filename: os.rename(tmpfile, ifile) elif type(trans_filename) == type(""): os.rename(tmpfile, ifile + trans_filename) else: os.rename(tmpfile, trans_filename(ifile)) stdout = sys.stdout stderr = sys.stderr pretty = pprint.PrettyPrinter(indent=4) globalhints = SemanticHintsRegistry() def main(args, dummy_mainout=stdout, mainerr=stderr): #global globalhints, pretty import getopt (options, arguments) = getopt.getopt(args, "d:e:D:I:h:qsxvwV") includepath = ["."] hintfile = None quiet = False portability = 0 docbook5 = False verbosity_level = 0 verbosity = None encoding = "ISO-8859-1" for (switch, val) in options: if switch == "-d": verbosity = val elif switch == "-e": encoding = val elif switch == "-D": apply(globalhints.post, val.split("=")) elif switch == "-I": includepath = val.split(":") elif switch == '-h': hintfile = val elif switch == '-q': quiet += 1 elif switch == '-x': docbook5 += 1 elif switch == '-v': verbosity_level += 1 elif switch == '-w': portability += 1 elif switch == '-V': sys.stdout.write("doclifter version %s\n" % version) sys.exit(0) if not verbosity: verbosity = "gpscmibz"[:verbosity_level] try: lifter = DocLifter(verbosity, quiet, portability, includepath, encoding, docbook5) transfile("doclifter", arguments, lifter, ".xml") if hintfile: fp = open(hintfile, "w") fp.write(str(globalhints)) fp.close() return 0 except LiftException, e: mainerr.write("doclifter: " + e.message + "\n") return e.retval except IOError, e: mainerr.write("doclifter: file I/O error: %s\n" % e) return 3 except KeyboardInterrupt: mainerr.write("doclifter: bailing out...\n") return 5 except: if verbosity: (exc_type, exc_value, exc_traceback) = sys.exc_info() raise exc_type, exc_value, exc_traceback else: mainerr.write("doclifter: internal error\n") return 4 if __name__ == "__main__": # Run the main sequence sys.exit(main(sys.argv[1:])) # The following sets edit modes for GNU EMACS # Local Variables: # mode:python # End: doclifter-2.11/TODO0000664000175000017500000000025612152465736012264 0ustar esresrBugs: * Docbook 5 translation is incomplete; inclusions won't work, entities are untested. See the PATCHES file for other, more minor problems mainly due to bad markup. doclifter-2.11/Makefile0000664000175000017500000000402112152465736013226 0ustar esresr# Makefile for the doclifter project VERSION=$(shell sed $(MANDIR)/doclifter.1.gz rm doclifter.1 doclifter.1: doclifter.xml xmlto man doclifter.xml manlifter.1: manlifter.xml xmlto man manlifter.xml doclifter.html: doclifter.xml xmlto xhtml-nochunks doclifter.xml manlifter.html: manlifter.xml xmlto xhtml-nochunks manlifter.xml doclifter-$(VERSION).tar.gz: $(SOURCES) mkdir doclifter-$(VERSION) cp -r $(SOURCES) doclifter-$(VERSION) tar -czf doclifter-$(VERSION).tar.gz doclifter-$(VERSION) rm -fr doclifter-$(VERSION) ls -l doclifter-$(VERSION).tar.gz doclifter-$(VERSION).md5: doclifter-$(VERSION).tar.gz @md5sum doclifter-$(VERSION).tar.gz >doclifter-$(VERSION).md5 check: @cd tests >/dev/null; make --quiet PYLINTOPTS = --rcfile=/dev/null --reports=n --include-ids=y --disable="C0103,C0111,C0301,C0302,C0322,C0321,C0324,W0402,W0511,W0141,W0231,W0333,W0631,R0201,R0911,R0912,R0914,R0902,R0903,R0904,R0913,R0914,R0915" pylint: @pylint --output-format=parseable $(PYLINTOPTS) doclifter pychecker: @echo "Expect 4 warnings." @ln -f doclifter doclifter.py @-pychecker --only --quiet --limit 50 doclifter.py @rm -f doclifter.py doclifter.pyc dist: doclifter-$(VERSION).tar.gz clean: rm -f doclifter.html manlifter.html doclifter.1 manlifter.1 rm -f *.pyc docliftertest.xml foobar* fixed* *~ bugs.html rm -f SHIPPER.* index.html *.tar.gz *.md5 release: doclifter-$(VERSION).tar.gz doclifter-$(VERSION).md5 doclifter.html manlifter.html shipper -u -m -t; make clean # This is used only for updating the bugs page on my website. # It won't work for anyone else. update: problemgen.py >bugs.html scp -r bugs.html prepatch/ login.ibiblio.org:/public/html/catb/esr/doclifter doclifter-2.11/README0000664000175000017500000000162512152465736012455 0ustar esresr doclifter The doclifter program translates documents written in troff macros to DocBook. Lifting documents from presentation level to semantic level is hard, and a really good job requires human polishing. This tool aims to do everything that can be mechanized, and to preserve any troff-level information that might have structural implications in XML comments. This tool does the hard parts. TBL tables are translated into DocBook table markup, PIC into SVG, and EQN into MathML (relying on pic2svg and GNU eqn for the last two). Test loads are included in the distribution; do "make check" to run the regression tests. The code has been live tested in about the most brutal possible way; it has been run against every single man page in all sections of a desktop Ubuntu installation. It lifts over 93% of these pages without requiring any hand-hacking. There is a detailed change log in the NEWS file. doclifter-2.11/manlifter.10000664000175000017500000001734712152465736013650 0ustar esresr'\" t .\" Title: manlifter .\" Author: [see the "Author" section] .\" Generator: DocBook XSL Stylesheets v1.76.1 .\" Date: 06/01/2013 .\" Manual: Documentation Tools .\" Source: manlifter .\" Language: English .\" .TH "MANLIFTER" "1" "06/01/2013" "manlifter" "Documentation Tools" .\" ----------------------------------------------------------------- .\" * Define some portability stuff .\" ----------------------------------------------------------------- .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .\" http://bugs.debian.org/507673 .\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" ----------------------------------------------------------------- .\" * set default formatting .\" ----------------------------------------------------------------- .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .\" ----------------------------------------------------------------- .\" * MAIN CONTENT STARTS HERE * .\" ----------------------------------------------------------------- .SH "NAME" manlifter \- mass\-conversion script and test harness for doclifter .SH "SYNOPSIS" .HP \w'\fBmanlifter\fR\ 'u \fBmanlifter\fR [\-d\ \fIoption\fR] [\-e] [\-f\ \fIlistfile\fR] [\-h] [\-I\ \fImandir\fR] [\-m] [\-M] [\-o\ \fIoutdir\fR] [\-p\ \fIpatch\-directory\fR] [\-P] [\-q] [\-v] [\-s\ \fIsection\fR] [\-X\ \fIexclude\fR] \fIname\fR... .HP \w'\fBmanlifter\fR\ 'u \fBmanlifter\fR [\-S] .SH "DESCRIPTION" .PP \fBmanlifter\fR is a script that sequences \fBdoclifter\fR(1) to convert an entire manual\-page tree to XML\-Docbook, optionally also generating HTML from the XML\&. Another use is as a torture\-test tool for doclifter; it logs errors to standard output and collects timings\&. .PP Called without any file arguments, manlifter tries to convert all eligible man pages installed on the system, placing the resulting xml files under xmlman in the current directory\&. Each successfully translated page foo\&.N is copied to manN/foo\&.xml beneath the output directory, regardless of what source directory it came from\&. .PP A manual page is considered ineligible for batch conversion if it contains text indicating it has been generated from DocBook masters of from Doxygen\&. .PP For each source file examined, if the destination file exists and is newer than the source, the conversion is skipped; thus, incremental runs of \fBmanlifter\fR do the least work needed to keep the target XML tree up to date\&. Likewise, in \-h mode derived HTML files are only made when necessary\&. .PP Stub pages that are just \fB\&.so\fR redirections are translated to corresponding symlinks of XML files (and, with \-h, HTML files)\&. .PP \fBmanlifter\fR may also be called with a single file argument, which is interpreted as the stem name of a potential manual page\&. \fBmanlifter\fR then searches all selected manual sections for a matching page and attempts to convert it\&. In this case, a copy of the man page and the converted version are dropped immediately beheath the output directory, with the names foobar\&.man and foobar\&.man\&.xml, respectively\&. This mode is normally only of interest only to \fBdoclifter\fR developers for debugging that program\&. .PP In either of the above cases, \fBmanlifter\fR will uncompress the file if it has a \&.gz, \&.bz2 or \&.Z suffix on the name\&. .PP Options are as follows: .PP \-d .RS 4 Pass the string argument to each doclifter call as options\&. Each space\-separated token in the string becomes a separate argument in the call\&. .RE .PP \-e .RS 4 Run in log\-filter mode (mainly of interest to \fBdoclifter\fR developers)\&. In this mode, \fBmanlifter\fR reads a test log from standard input and filters it in a a way dependent on the \-f and \-q options\&. If neither of these is given, messages from successful runs are stripped out and only errors passed through to standard output\&. .RE .PP \-f .RS 4 Normally, run doclifter on the files named by each line in the argument file\&. In error\-filter mode the argument is instead interpreted as a filtering regular expression\&. .RE .PP \-h .RS 4 Also generate HTML translations into the output directory\&. DocBook citerefentry markup is transformed to hyperlinks in the directory, and a contents listing is generated to index\&.html\&. .RE .PP \-I .RS 4 Specify the root of the manual\-page tree\&. By default this is /usr/share/man\&. .RE .PP \-m .RS 4 Make a patch to correct the last page fetched\&. It is copied, an editor is called on the copy (using the environment variable \fB$EDITOR\fR), and then \fBdiff\fR(1) is called to drop the patch in the prepatch directory\&. Fails with an error if such a patch is already present\&. .RE .PP \-M .RS 4 Lift the specified files, then do the equivalent of the \-m option\&. .RE .PP \-o .RS 4 Set the output directory into which XML\-DocBook translations will be dropped\&. By default this is xmlman under the current directory in batch mode, or the current directory otherwise\&. .RE .PP \-p .RS 4 Interpret the argument as the name of a patch directory (the default name is prepatch under the current directory)\&. Each file named foo\&.N\&.patch is interpreted as a patch to be applied to the manual page foo(N) before doclifter translates it\&. .RE .PP \-P .RS 4 Enable profiling using the Python hotshot module; this is only useful for tuning doclifter so it runs faster\&. Raw data is written to manlifter\&.prof, and a digested report is appended to the log on standard output\&. Warning: the raw data files can become huge, and the postprocessing for report generation can take as long as the actual processing (or longer!)\&. .RE .PP \-q .RS 4 Normally, pass the \-q (quiet) option to each doclifter call\&. In error\-filter mode, return a list of files on which translation failed\&. .RE .PP \-v .RS 4 Pass the \-v (verbose) option to each doclifter call\&. This option can be repeated to increase the verbosity level\&. .RE .PP \-s .RS 4 Specify a section to scan\&. Use this with an argument; it should not be necessary when doing a conversion of the entire tree\&. .RE .PP \-S .RS 4 Compile error statistics from a \fBmanlifter\fR logfile presented on standard input\&. This option will be of interest mainly to \fBdoclifter\fR developers\&. .RE .PP \-X .RS 4 In batch mode exclude pages listed in the argument file\&. Meant to be used for pages that are known good and take an extremely long time to lift, in order to cut down the time for a test run\&. (Most pages lift in less than a half second, but a few can take 15 minutes or longer\&.) .RE .PP \fBmanlifter\fR emits a logfile to standard output\&. The file begins with a timestamp line and a blank line, and ends with a line giving run time and various interesting statistics\&. Between these are stanzas, separated by blank lines, one for each file on which \fBdoclifter\fR was run\&. .PP The first line of each stanza beguns with "! ", followed by the pathname of the source manual pager, followed by "=" and the return status of doclifter run on that file\&. Following that is a space and \fBdoclifter\fR\*(Aqs runtime in seconds\&. .PP This initial line may be followed by information messages and the error output of the doclifter run\&. .PP \fBmanlifter\fR must find a copy of \fBdoclifter\fR in either the current directory or one of the command directories in your \fBPATH\fR in order to run\&. .SH "BUGS" .PP HTML generation is painfully slow\&. Unfortunately, there is little we can do to remedy this, because XSLT engines are painfully slow\&. .SH "SEE ALSO" .PP \fBdoclifter\fR(1), \fBxmlto\fR(1) .SH "AUTHOR" .PP Eric S\&. Raymond esr@thyrsus\&.com .PP There is a project web page at \m[blue]\fBhttp://www\&.catb\&.org/~esr/doclifter/\fR\m[]\&.