pax_global_header00006660000000000000000000000064126013122710014505gustar00rootroot0000000000000052 comment=c955161e575a01ec6181148914d8c3de152170fc tar-split-0.9.10/000077500000000000000000000000001260131227100135135ustar00rootroot00000000000000tar-split-0.9.10/.travis.yml000066400000000000000000000003711260131227100156250ustar00rootroot00000000000000language: go go: - tip - 1.5.1 - 1.4.3 - 1.3.3 - 1.2.2 # let us have pretty, fast Docker-based Travis workers! sudo: false install: - go get -d ./... - go get golang.org/x/tools/cmd/vet script: - go test -v ./... - go vet ./... tar-split-0.9.10/LICENSE000066400000000000000000000020631260131227100145210ustar00rootroot00000000000000Copyright (c) 2015 Vincent Batts, Raleigh, NC, USA Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. tar-split-0.9.10/README.md000066400000000000000000000074141260131227100150000ustar00rootroot00000000000000# tar-split [![Build Status](https://travis-ci.org/vbatts/tar-split.svg?branch=master)](https://travis-ci.org/vbatts/tar-split) Pristinely disassembling a tar archive, and stashing needed raw bytes and offsets to reassemble a validating original archive. ## Docs Code API for libraries provided by `tar-split`: * https://godoc.org/github.com/vbatts/tar-split/tar/asm * https://godoc.org/github.com/vbatts/tar-split/tar/storage * https://godoc.org/github.com/vbatts/tar-split/archive/tar ## Install The command line utilitiy is installable via: ```bash go get github.com/vbatts/tar-split/cmd/tar-split ``` ## Usage For cli usage, see its [README.md](cmd/tar-split/README.md). For the library see the [docs](#docs) ## Caveat Eventually this should detect TARs that this is not possible with. For example stored sparse files that have "holes" in them, will be read as a contiguous file, though the archive contents may be recorded in sparse format. Therefore when adding the file payload to a reassembled tar, to achieve identical output, the file payload would need be precisely re-sparsified. This is not something I seek to fix imediately, but would rather have an alert that precise reassembly is not possible. (see more http://www.gnu.org/software/tar/manual/html_node/Sparse-Formats.html) Other caveat, while tar archives support having multiple file entries for the same path, we will not support this feature. If there are more than one entries with the same path, expect an err (like `ErrDuplicatePath`) or a resulting tar stream that does not validate your original checksum/signature. ## Contract Do not break the API of stdlib `archive/tar` in our fork (ideally find an upstream mergeable solution). ## Std Version The version of golang stdlib `archive/tar` is from go1.4.1, and their master branch around [a9dddb53f](https://github.com/golang/go/tree/a9dddb53f). It is minimally extended to expose the raw bytes of the TAR, rather than just the marshalled headers and file stream. ## Design See the [design](concept/DESIGN.md). ## Stored Metadata Since the raw bytes of the headers and padding are stored, you may be wondering what the size implications are. The headers are at least 512 bytes per file (sometimes more), at least 1024 null bytes on the end, and then various padding. This makes for a constant linear growth in the stored metadata, with a naive storage implementation. First we'll get an archive to work with. For repeatability, we'll make an archive from what you've just cloned: ```bash git archive --format=tar -o tar-split.tar HEAD . ``` ```bash $ go get github.com/vbatts/tar-split/cmd/tar-split $ tar-split checksize ./tar-split.tar inspecting "tar-split.tar" (size 210k) -- number of files: 50 -- size of metadata uncompressed: 53k -- size of gzip compressed metadata: 3k ``` So assuming you've managed the extraction of the archive yourself, for reuse of the file payloads from a relative path, then the only additional storage implications are as little as 3kb. But let's look at a larger archive, with many files. ```bash $ ls -sh ./d.tar 1.4G ./d.tar $ tar-split checksize ~/d.tar inspecting "/home/vbatts/d.tar" (size 1420749k) -- number of files: 38718 -- size of metadata uncompressed: 43261k -- size of gzip compressed metadata: 2251k ``` Here, an archive with 38,718 files has a compressed footprint of about 2mb. Rolling the null bytes on the end of the archive, we will assume a bytes-per-file rate for the storage implications. | uncompressed | compressed | | :----------: | :--------: | | ~ 1kb per/file | 0.06kb per/file | ## What's Next? * More implementations of storage Packer and Unpacker * More implementations of FileGetter and FilePutter * would be interesting to have an assembler stream that implements `io.Seeker` ## License See [LICENSE](LICENSE) tar-split-0.9.10/archive/000077500000000000000000000000001260131227100151345ustar00rootroot00000000000000tar-split-0.9.10/archive/tar/000077500000000000000000000000001260131227100157225ustar00rootroot00000000000000tar-split-0.9.10/archive/tar/common.go000066400000000000000000000203171260131227100175440ustar00rootroot00000000000000// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package tar implements access to tar archives. // It aims to cover most of the variations, including those produced // by GNU and BSD tars. // // References: // http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5 // http://www.gnu.org/software/tar/manual/html_node/Standard.html // http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html package tar import ( "bytes" "errors" "fmt" "os" "path" "time" ) const ( blockSize = 512 // Types TypeReg = '0' // regular file TypeRegA = '\x00' // regular file TypeLink = '1' // hard link TypeSymlink = '2' // symbolic link TypeChar = '3' // character device node TypeBlock = '4' // block device node TypeDir = '5' // directory TypeFifo = '6' // fifo node TypeCont = '7' // reserved TypeXHeader = 'x' // extended header TypeXGlobalHeader = 'g' // global extended header TypeGNULongName = 'L' // Next file has a long name TypeGNULongLink = 'K' // Next file symlinks to a file w/ a long name TypeGNUSparse = 'S' // sparse file ) // A Header represents a single header in a tar archive. // Some fields may not be populated. type Header struct { Name string // name of header file entry Mode int64 // permission and mode bits Uid int // user id of owner Gid int // group id of owner Size int64 // length in bytes ModTime time.Time // modified time Typeflag byte // type of header entry Linkname string // target name of link Uname string // user name of owner Gname string // group name of owner Devmajor int64 // major number of character or block device Devminor int64 // minor number of character or block device AccessTime time.Time // access time ChangeTime time.Time // status change time Xattrs map[string]string } // File name constants from the tar spec. const ( fileNameSize = 100 // Maximum number of bytes in a standard tar name. fileNamePrefixSize = 155 // Maximum number of ustar extension bytes. ) // FileInfo returns an os.FileInfo for the Header. func (h *Header) FileInfo() os.FileInfo { return headerFileInfo{h} } // headerFileInfo implements os.FileInfo. type headerFileInfo struct { h *Header } func (fi headerFileInfo) Size() int64 { return fi.h.Size } func (fi headerFileInfo) IsDir() bool { return fi.Mode().IsDir() } func (fi headerFileInfo) ModTime() time.Time { return fi.h.ModTime } func (fi headerFileInfo) Sys() interface{} { return fi.h } // Name returns the base name of the file. func (fi headerFileInfo) Name() string { if fi.IsDir() { return path.Base(path.Clean(fi.h.Name)) } return path.Base(fi.h.Name) } // Mode returns the permission and mode bits for the headerFileInfo. func (fi headerFileInfo) Mode() (mode os.FileMode) { // Set file permission bits. mode = os.FileMode(fi.h.Mode).Perm() // Set setuid, setgid and sticky bits. if fi.h.Mode&c_ISUID != 0 { // setuid mode |= os.ModeSetuid } if fi.h.Mode&c_ISGID != 0 { // setgid mode |= os.ModeSetgid } if fi.h.Mode&c_ISVTX != 0 { // sticky mode |= os.ModeSticky } // Set file mode bits. // clear perm, setuid, setgid and sticky bits. m := os.FileMode(fi.h.Mode) &^ 07777 if m == c_ISDIR { // directory mode |= os.ModeDir } if m == c_ISFIFO { // named pipe (FIFO) mode |= os.ModeNamedPipe } if m == c_ISLNK { // symbolic link mode |= os.ModeSymlink } if m == c_ISBLK { // device file mode |= os.ModeDevice } if m == c_ISCHR { // Unix character device mode |= os.ModeDevice mode |= os.ModeCharDevice } if m == c_ISSOCK { // Unix domain socket mode |= os.ModeSocket } switch fi.h.Typeflag { case TypeSymlink: // symbolic link mode |= os.ModeSymlink case TypeChar: // character device node mode |= os.ModeDevice mode |= os.ModeCharDevice case TypeBlock: // block device node mode |= os.ModeDevice case TypeDir: // directory mode |= os.ModeDir case TypeFifo: // fifo node mode |= os.ModeNamedPipe } return mode } // sysStat, if non-nil, populates h from system-dependent fields of fi. var sysStat func(fi os.FileInfo, h *Header) error // Mode constants from the tar spec. const ( c_ISUID = 04000 // Set uid c_ISGID = 02000 // Set gid c_ISVTX = 01000 // Save text (sticky bit) c_ISDIR = 040000 // Directory c_ISFIFO = 010000 // FIFO c_ISREG = 0100000 // Regular file c_ISLNK = 0120000 // Symbolic link c_ISBLK = 060000 // Block special file c_ISCHR = 020000 // Character special file c_ISSOCK = 0140000 // Socket ) // Keywords for the PAX Extended Header const ( paxAtime = "atime" paxCharset = "charset" paxComment = "comment" paxCtime = "ctime" // please note that ctime is not a valid pax header. paxGid = "gid" paxGname = "gname" paxLinkpath = "linkpath" paxMtime = "mtime" paxPath = "path" paxSize = "size" paxUid = "uid" paxUname = "uname" paxXattr = "SCHILY.xattr." paxNone = "" ) // FileInfoHeader creates a partially-populated Header from fi. // If fi describes a symlink, FileInfoHeader records link as the link target. // If fi describes a directory, a slash is appended to the name. // Because os.FileInfo's Name method returns only the base name of // the file it describes, it may be necessary to modify the Name field // of the returned header to provide the full path name of the file. func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) { if fi == nil { return nil, errors.New("tar: FileInfo is nil") } fm := fi.Mode() h := &Header{ Name: fi.Name(), ModTime: fi.ModTime(), Mode: int64(fm.Perm()), // or'd with c_IS* constants later } switch { case fm.IsRegular(): h.Mode |= c_ISREG h.Typeflag = TypeReg h.Size = fi.Size() case fi.IsDir(): h.Typeflag = TypeDir h.Mode |= c_ISDIR h.Name += "/" case fm&os.ModeSymlink != 0: h.Typeflag = TypeSymlink h.Mode |= c_ISLNK h.Linkname = link case fm&os.ModeDevice != 0: if fm&os.ModeCharDevice != 0 { h.Mode |= c_ISCHR h.Typeflag = TypeChar } else { h.Mode |= c_ISBLK h.Typeflag = TypeBlock } case fm&os.ModeNamedPipe != 0: h.Typeflag = TypeFifo h.Mode |= c_ISFIFO case fm&os.ModeSocket != 0: h.Mode |= c_ISSOCK default: return nil, fmt.Errorf("archive/tar: unknown file mode %v", fm) } if fm&os.ModeSetuid != 0 { h.Mode |= c_ISUID } if fm&os.ModeSetgid != 0 { h.Mode |= c_ISGID } if fm&os.ModeSticky != 0 { h.Mode |= c_ISVTX } // If possible, populate additional fields from OS-specific // FileInfo fields. if sys, ok := fi.Sys().(*Header); ok { // This FileInfo came from a Header (not the OS). Use the // original Header to populate all remaining fields. h.Uid = sys.Uid h.Gid = sys.Gid h.Uname = sys.Uname h.Gname = sys.Gname h.AccessTime = sys.AccessTime h.ChangeTime = sys.ChangeTime if sys.Xattrs != nil { h.Xattrs = make(map[string]string) for k, v := range sys.Xattrs { h.Xattrs[k] = v } } if sys.Typeflag == TypeLink { // hard link h.Typeflag = TypeLink h.Size = 0 h.Linkname = sys.Linkname } } if sysStat != nil { return h, sysStat(fi, h) } return h, nil } var zeroBlock = make([]byte, blockSize) // POSIX specifies a sum of the unsigned byte values, but the Sun tar uses signed byte values. // We compute and return both. func checksum(header []byte) (unsigned int64, signed int64) { for i := 0; i < len(header); i++ { if i == 148 { // The chksum field (header[148:156]) is special: it should be treated as space bytes. unsigned += ' ' * 8 signed += ' ' * 8 i += 7 continue } unsigned += int64(header[i]) signed += int64(int8(header[i])) } return } type slicer []byte func (sp *slicer) next(n int) (b []byte) { s := *sp b, *sp = s[0:n], s[n:] return } func isASCII(s string) bool { for _, c := range s { if c >= 0x80 { return false } } return true } func toASCII(s string) string { if isASCII(s) { return s } var buf bytes.Buffer for _, c := range s { if c < 0x80 { buf.WriteByte(byte(c)) } } return buf.String() } tar-split-0.9.10/archive/tar/example_test.go000066400000000000000000000031731260131227100207470ustar00rootroot00000000000000// Copyright 2013 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package tar_test import ( "archive/tar" "bytes" "fmt" "io" "log" "os" ) func Example() { // Create a buffer to write our archive to. buf := new(bytes.Buffer) // Create a new tar archive. tw := tar.NewWriter(buf) // Add some files to the archive. var files = []struct { Name, Body string }{ {"readme.txt", "This archive contains some text files."}, {"gopher.txt", "Gopher names:\nGeorge\nGeoffrey\nGonzo"}, {"todo.txt", "Get animal handling licence."}, } for _, file := range files { hdr := &tar.Header{ Name: file.Name, Mode: 0600, Size: int64(len(file.Body)), } if err := tw.WriteHeader(hdr); err != nil { log.Fatalln(err) } if _, err := tw.Write([]byte(file.Body)); err != nil { log.Fatalln(err) } } // Make sure to check the error on Close. if err := tw.Close(); err != nil { log.Fatalln(err) } // Open the tar archive for reading. r := bytes.NewReader(buf.Bytes()) tr := tar.NewReader(r) // Iterate through the files in the archive. for { hdr, err := tr.Next() if err == io.EOF { // end of tar archive break } if err != nil { log.Fatalln(err) } fmt.Printf("Contents of %s:\n", hdr.Name) if _, err := io.Copy(os.Stdout, tr); err != nil { log.Fatalln(err) } fmt.Println() } // Output: // Contents of readme.txt: // This archive contains some text files. // Contents of gopher.txt: // Gopher names: // George // Geoffrey // Gonzo // Contents of todo.txt: // Get animal handling licence. } tar-split-0.9.10/archive/tar/reader.go000066400000000000000000000622121260131227100175160ustar00rootroot00000000000000// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package tar // TODO(dsymonds): // - pax extensions import ( "bytes" "errors" "io" "io/ioutil" "os" "strconv" "strings" "time" ) var ( ErrHeader = errors.New("archive/tar: invalid tar header") ) const maxNanoSecondIntSize = 9 // A Reader provides sequential access to the contents of a tar archive. // A tar archive consists of a sequence of files. // The Next method advances to the next file in the archive (including the first), // and then it can be treated as an io.Reader to access the file's data. type Reader struct { r io.Reader err error pad int64 // amount of padding (ignored) after current file entry curr numBytesReader // reader for current file entry hdrBuff [blockSize]byte // buffer to use in readHeader RawAccounting bool // Whether to enable the access needed to reassemble the tar from raw bytes. Some performance/memory hit for this. rawBytes *bytes.Buffer // last raw bits } // RawBytes accesses the raw bytes of the archive, apart from the file payload itself. // This includes the header and padding. // // This call resets the current rawbytes buffer // // Only when RawAccounting is enabled, otherwise this returns nil func (tr *Reader) RawBytes() []byte { if !tr.RawAccounting { return nil } if tr.rawBytes == nil { tr.rawBytes = bytes.NewBuffer(nil) } // if we've read them, then flush them. defer tr.rawBytes.Reset() return tr.rawBytes.Bytes() } // A numBytesReader is an io.Reader with a numBytes method, returning the number // of bytes remaining in the underlying encoded data. type numBytesReader interface { io.Reader numBytes() int64 } // A regFileReader is a numBytesReader for reading file data from a tar archive. type regFileReader struct { r io.Reader // underlying reader nb int64 // number of unread bytes for current file entry } // A sparseFileReader is a numBytesReader for reading sparse file data from a tar archive. type sparseFileReader struct { rfr *regFileReader // reads the sparse-encoded file data sp []sparseEntry // the sparse map for the file pos int64 // keeps track of file position tot int64 // total size of the file } // Keywords for GNU sparse files in a PAX extended header const ( paxGNUSparseNumBlocks = "GNU.sparse.numblocks" paxGNUSparseOffset = "GNU.sparse.offset" paxGNUSparseNumBytes = "GNU.sparse.numbytes" paxGNUSparseMap = "GNU.sparse.map" paxGNUSparseName = "GNU.sparse.name" paxGNUSparseMajor = "GNU.sparse.major" paxGNUSparseMinor = "GNU.sparse.minor" paxGNUSparseSize = "GNU.sparse.size" paxGNUSparseRealSize = "GNU.sparse.realsize" ) // Keywords for old GNU sparse headers const ( oldGNUSparseMainHeaderOffset = 386 oldGNUSparseMainHeaderIsExtendedOffset = 482 oldGNUSparseMainHeaderNumEntries = 4 oldGNUSparseExtendedHeaderIsExtendedOffset = 504 oldGNUSparseExtendedHeaderNumEntries = 21 oldGNUSparseOffsetSize = 12 oldGNUSparseNumBytesSize = 12 ) // NewReader creates a new Reader reading from r. func NewReader(r io.Reader) *Reader { return &Reader{r: r} } // Next advances to the next entry in the tar archive. // // io.EOF is returned at the end of the input. func (tr *Reader) Next() (*Header, error) { var hdr *Header if tr.RawAccounting { if tr.rawBytes == nil { tr.rawBytes = bytes.NewBuffer(nil) } else { tr.rawBytes.Reset() } } if tr.err == nil { tr.skipUnread() } if tr.err != nil { return hdr, tr.err } hdr = tr.readHeader() if hdr == nil { return hdr, tr.err } // Check for PAX/GNU header. switch hdr.Typeflag { case TypeXHeader: // PAX extended header headers, err := parsePAX(tr) if err != nil { return nil, err } // We actually read the whole file, // but this skips alignment padding tr.skipUnread() if tr.err != nil { return nil, tr.err } hdr = tr.readHeader() if hdr == nil { return nil, tr.err } mergePAX(hdr, headers) // Check for a PAX format sparse file sp, err := tr.checkForGNUSparsePAXHeaders(hdr, headers) if err != nil { tr.err = err return nil, err } if sp != nil { // Current file is a PAX format GNU sparse file. // Set the current file reader to a sparse file reader. tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size} } return hdr, nil case TypeGNULongName: // We have a GNU long name header. Its contents are the real file name. realname, err := ioutil.ReadAll(tr) if err != nil { return nil, err } var buf []byte if tr.RawAccounting { if _, err = tr.rawBytes.Write(realname); err != nil { return nil, err } buf = make([]byte, tr.rawBytes.Len()) copy(buf[:], tr.RawBytes()) } hdr, err := tr.Next() // since the above call to Next() resets the buffer, we need to throw the bytes over if tr.RawAccounting { buf = append(buf, tr.RawBytes()...) if _, err = tr.rawBytes.Write(buf); err != nil { return nil, err } } hdr.Name = cString(realname) return hdr, err case TypeGNULongLink: // We have a GNU long link header. realname, err := ioutil.ReadAll(tr) if err != nil { return nil, err } var buf []byte if tr.RawAccounting { if _, err = tr.rawBytes.Write(realname); err != nil { return nil, err } buf = make([]byte, tr.rawBytes.Len()) copy(buf[:], tr.RawBytes()) } hdr, err := tr.Next() // since the above call to Next() resets the buffer, we need to throw the bytes over if tr.RawAccounting { buf = append(buf, tr.RawBytes()...) if _, err = tr.rawBytes.Write(buf); err != nil { return nil, err } } hdr.Linkname = cString(realname) return hdr, err } return hdr, tr.err } // checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then // this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to // be treated as a regular file. func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) { var sparseFormat string // Check for sparse format indicators major, majorOk := headers[paxGNUSparseMajor] minor, minorOk := headers[paxGNUSparseMinor] sparseName, sparseNameOk := headers[paxGNUSparseName] _, sparseMapOk := headers[paxGNUSparseMap] sparseSize, sparseSizeOk := headers[paxGNUSparseSize] sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize] // Identify which, if any, sparse format applies from which PAX headers are set if majorOk && minorOk { sparseFormat = major + "." + minor } else if sparseNameOk && sparseMapOk { sparseFormat = "0.1" } else if sparseSizeOk { sparseFormat = "0.0" } else { // Not a PAX format GNU sparse file. return nil, nil } // Check for unknown sparse format if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" { return nil, nil } // Update hdr from GNU sparse PAX headers if sparseNameOk { hdr.Name = sparseName } if sparseSizeOk { realSize, err := strconv.ParseInt(sparseSize, 10, 0) if err != nil { return nil, ErrHeader } hdr.Size = realSize } else if sparseRealSizeOk { realSize, err := strconv.ParseInt(sparseRealSize, 10, 0) if err != nil { return nil, ErrHeader } hdr.Size = realSize } // Set up the sparse map, according to the particular sparse format in use var sp []sparseEntry var err error switch sparseFormat { case "0.0", "0.1": sp, err = readGNUSparseMap0x1(headers) case "1.0": sp, err = readGNUSparseMap1x0(tr.curr) } return sp, err } // mergePAX merges well known headers according to PAX standard. // In general headers with the same name as those found // in the header struct overwrite those found in the header // struct with higher precision or longer values. Esp. useful // for name and linkname fields. func mergePAX(hdr *Header, headers map[string]string) error { for k, v := range headers { switch k { case paxPath: hdr.Name = v case paxLinkpath: hdr.Linkname = v case paxGname: hdr.Gname = v case paxUname: hdr.Uname = v case paxUid: uid, err := strconv.ParseInt(v, 10, 0) if err != nil { return err } hdr.Uid = int(uid) case paxGid: gid, err := strconv.ParseInt(v, 10, 0) if err != nil { return err } hdr.Gid = int(gid) case paxAtime: t, err := parsePAXTime(v) if err != nil { return err } hdr.AccessTime = t case paxMtime: t, err := parsePAXTime(v) if err != nil { return err } hdr.ModTime = t case paxCtime: t, err := parsePAXTime(v) if err != nil { return err } hdr.ChangeTime = t case paxSize: size, err := strconv.ParseInt(v, 10, 0) if err != nil { return err } hdr.Size = int64(size) default: if strings.HasPrefix(k, paxXattr) { if hdr.Xattrs == nil { hdr.Xattrs = make(map[string]string) } hdr.Xattrs[k[len(paxXattr):]] = v } } } return nil } // parsePAXTime takes a string of the form %d.%d as described in // the PAX specification. func parsePAXTime(t string) (time.Time, error) { buf := []byte(t) pos := bytes.IndexByte(buf, '.') var seconds, nanoseconds int64 var err error if pos == -1 { seconds, err = strconv.ParseInt(t, 10, 0) if err != nil { return time.Time{}, err } } else { seconds, err = strconv.ParseInt(string(buf[:pos]), 10, 0) if err != nil { return time.Time{}, err } nano_buf := string(buf[pos+1:]) // Pad as needed before converting to a decimal. // For example .030 -> .030000000 -> 30000000 nanoseconds if len(nano_buf) < maxNanoSecondIntSize { // Right pad nano_buf += strings.Repeat("0", maxNanoSecondIntSize-len(nano_buf)) } else if len(nano_buf) > maxNanoSecondIntSize { // Right truncate nano_buf = nano_buf[:maxNanoSecondIntSize] } nanoseconds, err = strconv.ParseInt(string(nano_buf), 10, 0) if err != nil { return time.Time{}, err } } ts := time.Unix(seconds, nanoseconds) return ts, nil } // parsePAX parses PAX headers. // If an extended header (type 'x') is invalid, ErrHeader is returned func parsePAX(r io.Reader) (map[string]string, error) { buf, err := ioutil.ReadAll(r) if err != nil { return nil, err } // leaving this function for io.Reader makes it more testable if tr, ok := r.(*Reader); ok && tr.RawAccounting { if _, err = tr.rawBytes.Write(buf); err != nil { return nil, err } } // For GNU PAX sparse format 0.0 support. // This function transforms the sparse format 0.0 headers into sparse format 0.1 headers. var sparseMap bytes.Buffer headers := make(map[string]string) // Each record is constructed as // "%d %s=%s\n", length, keyword, value for len(buf) > 0 { // or the header was empty to start with. var sp int // The size field ends at the first space. sp = bytes.IndexByte(buf, ' ') if sp == -1 { return nil, ErrHeader } // Parse the first token as a decimal integer. n, err := strconv.ParseInt(string(buf[:sp]), 10, 0) if err != nil || n < 5 || int64(len(buf)) < n { return nil, ErrHeader } // Extract everything between the decimal and the n -1 on the // beginning to eat the ' ', -1 on the end to skip the newline. var record []byte record, buf = buf[sp+1:n-1], buf[n:] // The first equals is guaranteed to mark the end of the key. // Everything else is value. eq := bytes.IndexByte(record, '=') if eq == -1 { return nil, ErrHeader } key, value := record[:eq], record[eq+1:] keyStr := string(key) if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes { // GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map. sparseMap.Write(value) sparseMap.Write([]byte{','}) } else { // Normal key. Set the value in the headers map. headers[keyStr] = string(value) } } if sparseMap.Len() != 0 { // Add sparse info to headers, chopping off the extra comma sparseMap.Truncate(sparseMap.Len() - 1) headers[paxGNUSparseMap] = sparseMap.String() } return headers, nil } // cString parses bytes as a NUL-terminated C-style string. // If a NUL byte is not found then the whole slice is returned as a string. func cString(b []byte) string { n := 0 for n < len(b) && b[n] != 0 { n++ } return string(b[0:n]) } func (tr *Reader) octal(b []byte) int64 { // Check for binary format first. if len(b) > 0 && b[0]&0x80 != 0 { var x int64 for i, c := range b { if i == 0 { c &= 0x7f // ignore signal bit in first byte } x = x<<8 | int64(c) } return x } // Because unused fields are filled with NULs, we need // to skip leading NULs. Fields may also be padded with // spaces or NULs. // So we remove leading and trailing NULs and spaces to // be sure. b = bytes.Trim(b, " \x00") if len(b) == 0 { return 0 } x, err := strconv.ParseUint(cString(b), 8, 64) if err != nil { tr.err = err } return int64(x) } // skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding. func (tr *Reader) skipUnread() { nr := tr.numBytes() + tr.pad // number of bytes to skip tr.curr, tr.pad = nil, 0 if tr.RawAccounting { _, tr.err = io.CopyN(tr.rawBytes, tr.r, nr) return } if sr, ok := tr.r.(io.Seeker); ok { if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil { return } } _, tr.err = io.CopyN(ioutil.Discard, tr.r, nr) } func (tr *Reader) verifyChecksum(header []byte) bool { if tr.err != nil { return false } given := tr.octal(header[148:156]) unsigned, signed := checksum(header) return given == unsigned || given == signed } func (tr *Reader) readHeader() *Header { header := tr.hdrBuff[:] copy(header, zeroBlock) if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil { // because it could read some of the block, but reach EOF first if tr.err == io.EOF && tr.RawAccounting { if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { return nil } } return nil } if tr.RawAccounting { if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { return nil } } // Two blocks of zero bytes marks the end of the archive. if bytes.Equal(header, zeroBlock[0:blockSize]) { if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil { // because it could read some of the block, but reach EOF first if tr.err == io.EOF && tr.RawAccounting { if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { return nil } } return nil } if tr.RawAccounting { if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { return nil } } if bytes.Equal(header, zeroBlock[0:blockSize]) { tr.err = io.EOF } else { tr.err = ErrHeader // zero block and then non-zero block } return nil } if !tr.verifyChecksum(header) { tr.err = ErrHeader return nil } // Unpack hdr := new(Header) s := slicer(header) hdr.Name = cString(s.next(100)) hdr.Mode = tr.octal(s.next(8)) hdr.Uid = int(tr.octal(s.next(8))) hdr.Gid = int(tr.octal(s.next(8))) hdr.Size = tr.octal(s.next(12)) if hdr.Size < 0 { tr.err = ErrHeader return nil } hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0) s.next(8) // chksum hdr.Typeflag = s.next(1)[0] hdr.Linkname = cString(s.next(100)) // The remainder of the header depends on the value of magic. // The original (v7) version of tar had no explicit magic field, // so its magic bytes, like the rest of the block, are NULs. magic := string(s.next(8)) // contains version field as well. var format string switch { case magic[:6] == "ustar\x00": // POSIX tar (1003.1-1988) if string(header[508:512]) == "tar\x00" { format = "star" } else { format = "posix" } case magic == "ustar \x00": // old GNU tar format = "gnu" } switch format { case "posix", "gnu", "star": hdr.Uname = cString(s.next(32)) hdr.Gname = cString(s.next(32)) devmajor := s.next(8) devminor := s.next(8) if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock { hdr.Devmajor = tr.octal(devmajor) hdr.Devminor = tr.octal(devminor) } var prefix string switch format { case "posix", "gnu": prefix = cString(s.next(155)) case "star": prefix = cString(s.next(131)) hdr.AccessTime = time.Unix(tr.octal(s.next(12)), 0) hdr.ChangeTime = time.Unix(tr.octal(s.next(12)), 0) } if len(prefix) > 0 { hdr.Name = prefix + "/" + hdr.Name } } if tr.err != nil { tr.err = ErrHeader return nil } // Maximum value of hdr.Size is 64 GB (12 octal digits), // so there's no risk of int64 overflowing. nb := int64(hdr.Size) tr.pad = -nb & (blockSize - 1) // blockSize is a power of two // Set the current file reader. tr.curr = ®FileReader{r: tr.r, nb: nb} // Check for old GNU sparse format entry. if hdr.Typeflag == TypeGNUSparse { // Get the real size of the file. hdr.Size = tr.octal(header[483:495]) // Read the sparse map. sp := tr.readOldGNUSparseMap(header) if tr.err != nil { return nil } // Current file is a GNU sparse file. Update the current file reader. tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size} } return hdr } // A sparseEntry holds a single entry in a sparse file's sparse map. // A sparse entry indicates the offset and size in a sparse file of a // block of data. type sparseEntry struct { offset int64 numBytes int64 } // readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format. // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries, // then one or more extension headers are used to store the rest of the sparse map. func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry { isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0 spCap := oldGNUSparseMainHeaderNumEntries if isExtended { spCap += oldGNUSparseExtendedHeaderNumEntries } sp := make([]sparseEntry, 0, spCap) s := slicer(header[oldGNUSparseMainHeaderOffset:]) // Read the four entries from the main tar header for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ { offset := tr.octal(s.next(oldGNUSparseOffsetSize)) numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize)) if tr.err != nil { tr.err = ErrHeader return nil } if offset == 0 && numBytes == 0 { break } sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) } for isExtended { // There are more entries. Read an extension header and parse its entries. sparseHeader := make([]byte, blockSize) if _, tr.err = io.ReadFull(tr.r, sparseHeader); tr.err != nil { return nil } if tr.RawAccounting { if _, tr.err = tr.rawBytes.Write(sparseHeader); tr.err != nil { return nil } } isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0 s = slicer(sparseHeader) for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ { offset := tr.octal(s.next(oldGNUSparseOffsetSize)) numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize)) if tr.err != nil { tr.err = ErrHeader return nil } if offset == 0 && numBytes == 0 { break } sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) } } return sp } // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format version 1.0. // The sparse map is stored just before the file data and padded out to the nearest block boundary. func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { buf := make([]byte, 2*blockSize) sparseHeader := buf[:blockSize] // readDecimal is a helper function to read a decimal integer from the sparse map // while making sure to read from the file in blocks of size blockSize readDecimal := func() (int64, error) { // Look for newline nl := bytes.IndexByte(sparseHeader, '\n') if nl == -1 { if len(sparseHeader) >= blockSize { // This is an error return 0, ErrHeader } oldLen := len(sparseHeader) newLen := oldLen + blockSize if cap(sparseHeader) < newLen { // There's more header, but we need to make room for the next block copy(buf, sparseHeader) sparseHeader = buf[:newLen] } else { // There's more header, and we can just reslice sparseHeader = sparseHeader[:newLen] } // Now that sparseHeader is large enough, read next block if _, err := io.ReadFull(r, sparseHeader[oldLen:newLen]); err != nil { return 0, err } // leaving this function for io.Reader makes it more testable if tr, ok := r.(*Reader); ok && tr.RawAccounting { if _, err := tr.rawBytes.Write(sparseHeader[oldLen:newLen]); err != nil { return 0, err } } // Look for a newline in the new data nl = bytes.IndexByte(sparseHeader[oldLen:newLen], '\n') if nl == -1 { // This is an error return 0, ErrHeader } nl += oldLen // We want the position from the beginning } // Now that we've found a newline, read a number n, err := strconv.ParseInt(string(sparseHeader[:nl]), 10, 0) if err != nil { return 0, ErrHeader } // Update sparseHeader to consume this number sparseHeader = sparseHeader[nl+1:] return n, nil } // Read the first block if _, err := io.ReadFull(r, sparseHeader); err != nil { return nil, err } // leaving this function for io.Reader makes it more testable if tr, ok := r.(*Reader); ok && tr.RawAccounting { if _, err := tr.rawBytes.Write(sparseHeader); err != nil { return nil, err } } // The first line contains the number of entries numEntries, err := readDecimal() if err != nil { return nil, err } // Read all the entries sp := make([]sparseEntry, 0, numEntries) for i := int64(0); i < numEntries; i++ { // Read the offset offset, err := readDecimal() if err != nil { return nil, err } // Read numBytes numBytes, err := readDecimal() if err != nil { return nil, err } sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) } return sp, nil } // readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format version 0.1. // The sparse map is stored in the PAX headers. func readGNUSparseMap0x1(headers map[string]string) ([]sparseEntry, error) { // Get number of entries numEntriesStr, ok := headers[paxGNUSparseNumBlocks] if !ok { return nil, ErrHeader } numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) if err != nil { return nil, ErrHeader } sparseMap := strings.Split(headers[paxGNUSparseMap], ",") // There should be two numbers in sparseMap for each entry if int64(len(sparseMap)) != 2*numEntries { return nil, ErrHeader } // Loop through the entries in the sparse map sp := make([]sparseEntry, 0, numEntries) for i := int64(0); i < numEntries; i++ { offset, err := strconv.ParseInt(sparseMap[2*i], 10, 0) if err != nil { return nil, ErrHeader } numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 0) if err != nil { return nil, ErrHeader } sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) } return sp, nil } // numBytes returns the number of bytes left to read in the current file's entry // in the tar archive, or 0 if there is no current file. func (tr *Reader) numBytes() int64 { if tr.curr == nil { // No current file, so no bytes return 0 } return tr.curr.numBytes() } // Read reads from the current entry in the tar archive. // It returns 0, io.EOF when it reaches the end of that entry, // until Next is called to advance to the next entry. func (tr *Reader) Read(b []byte) (n int, err error) { if tr.curr == nil { return 0, io.EOF } n, err = tr.curr.Read(b) if err != nil && err != io.EOF { tr.err = err } return } func (rfr *regFileReader) Read(b []byte) (n int, err error) { if rfr.nb == 0 { // file consumed return 0, io.EOF } if int64(len(b)) > rfr.nb { b = b[0:rfr.nb] } n, err = rfr.r.Read(b) rfr.nb -= int64(n) if err == io.EOF && rfr.nb > 0 { err = io.ErrUnexpectedEOF } return } // numBytes returns the number of bytes left to read in the file's data in the tar archive. func (rfr *regFileReader) numBytes() int64 { return rfr.nb } // readHole reads a sparse file hole ending at offset toOffset func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int { n64 := toOffset - sfr.pos if n64 > int64(len(b)) { n64 = int64(len(b)) } n := int(n64) for i := 0; i < n; i++ { b[i] = 0 } sfr.pos += n64 return n } // Read reads the sparse file data in expanded form. func (sfr *sparseFileReader) Read(b []byte) (n int, err error) { if len(sfr.sp) == 0 { // No more data fragments to read from. if sfr.pos < sfr.tot { // We're in the last hole n = sfr.readHole(b, sfr.tot) return } // Otherwise, we're at the end of the file return 0, io.EOF } if sfr.tot < sfr.sp[0].offset { return 0, io.ErrUnexpectedEOF } if sfr.pos < sfr.sp[0].offset { // We're in a hole n = sfr.readHole(b, sfr.sp[0].offset) return } // We're not in a hole, so we'll read from the next data fragment posInFragment := sfr.pos - sfr.sp[0].offset bytesLeft := sfr.sp[0].numBytes - posInFragment if int64(len(b)) > bytesLeft { b = b[0:bytesLeft] } n, err = sfr.rfr.Read(b) sfr.pos += int64(n) if int64(n) == bytesLeft { // We're done with this fragment sfr.sp = sfr.sp[1:] } if err == io.EOF && sfr.pos < sfr.tot { // We reached the end of the last fragment's data, but there's a final hole err = nil } return } // numBytes returns the number of bytes left to read in the sparse file's // sparse-encoded data in the tar archive. func (sfr *sparseFileReader) numBytes() int64 { return sfr.rfr.nb } tar-split-0.9.10/archive/tar/reader_test.go000066400000000000000000000444521260131227100205630ustar00rootroot00000000000000// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package tar import ( "bytes" "crypto/md5" "fmt" "io" "io/ioutil" "os" "reflect" "strings" "testing" "time" ) type untarTest struct { file string headers []*Header cksums []string } var gnuTarTest = &untarTest{ file: "testdata/gnu.tar", headers: []*Header{ { Name: "small.txt", Mode: 0640, Uid: 73025, Gid: 5000, Size: 5, ModTime: time.Unix(1244428340, 0), Typeflag: '0', Uname: "dsymonds", Gname: "eng", }, { Name: "small2.txt", Mode: 0640, Uid: 73025, Gid: 5000, Size: 11, ModTime: time.Unix(1244436044, 0), Typeflag: '0', Uname: "dsymonds", Gname: "eng", }, }, cksums: []string{ "e38b27eaccb4391bdec553a7f3ae6b2f", "c65bd2e50a56a2138bf1716f2fd56fe9", }, } var sparseTarTest = &untarTest{ file: "testdata/sparse-formats.tar", headers: []*Header{ { Name: "sparse-gnu", Mode: 420, Uid: 1000, Gid: 1000, Size: 200, ModTime: time.Unix(1392395740, 0), Typeflag: 0x53, Linkname: "", Uname: "david", Gname: "david", Devmajor: 0, Devminor: 0, }, { Name: "sparse-posix-0.0", Mode: 420, Uid: 1000, Gid: 1000, Size: 200, ModTime: time.Unix(1392342187, 0), Typeflag: 0x30, Linkname: "", Uname: "david", Gname: "david", Devmajor: 0, Devminor: 0, }, { Name: "sparse-posix-0.1", Mode: 420, Uid: 1000, Gid: 1000, Size: 200, ModTime: time.Unix(1392340456, 0), Typeflag: 0x30, Linkname: "", Uname: "david", Gname: "david", Devmajor: 0, Devminor: 0, }, { Name: "sparse-posix-1.0", Mode: 420, Uid: 1000, Gid: 1000, Size: 200, ModTime: time.Unix(1392337404, 0), Typeflag: 0x30, Linkname: "", Uname: "david", Gname: "david", Devmajor: 0, Devminor: 0, }, { Name: "end", Mode: 420, Uid: 1000, Gid: 1000, Size: 4, ModTime: time.Unix(1392398319, 0), Typeflag: 0x30, Linkname: "", Uname: "david", Gname: "david", Devmajor: 0, Devminor: 0, }, }, cksums: []string{ "6f53234398c2449fe67c1812d993012f", "6f53234398c2449fe67c1812d993012f", "6f53234398c2449fe67c1812d993012f", "6f53234398c2449fe67c1812d993012f", "b0061974914468de549a2af8ced10316", }, } var untarTests = []*untarTest{ gnuTarTest, sparseTarTest, { file: "testdata/star.tar", headers: []*Header{ { Name: "small.txt", Mode: 0640, Uid: 73025, Gid: 5000, Size: 5, ModTime: time.Unix(1244592783, 0), Typeflag: '0', Uname: "dsymonds", Gname: "eng", AccessTime: time.Unix(1244592783, 0), ChangeTime: time.Unix(1244592783, 0), }, { Name: "small2.txt", Mode: 0640, Uid: 73025, Gid: 5000, Size: 11, ModTime: time.Unix(1244592783, 0), Typeflag: '0', Uname: "dsymonds", Gname: "eng", AccessTime: time.Unix(1244592783, 0), ChangeTime: time.Unix(1244592783, 0), }, }, }, { file: "testdata/v7.tar", headers: []*Header{ { Name: "small.txt", Mode: 0444, Uid: 73025, Gid: 5000, Size: 5, ModTime: time.Unix(1244593104, 0), Typeflag: '\x00', }, { Name: "small2.txt", Mode: 0444, Uid: 73025, Gid: 5000, Size: 11, ModTime: time.Unix(1244593104, 0), Typeflag: '\x00', }, }, }, { file: "testdata/pax.tar", headers: []*Header{ { Name: "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", Mode: 0664, Uid: 1000, Gid: 1000, Uname: "shane", Gname: "shane", Size: 7, ModTime: time.Unix(1350244992, 23960108), ChangeTime: time.Unix(1350244992, 23960108), AccessTime: time.Unix(1350244992, 23960108), Typeflag: TypeReg, }, { Name: "a/b", Mode: 0777, Uid: 1000, Gid: 1000, Uname: "shane", Gname: "shane", Size: 0, ModTime: time.Unix(1350266320, 910238425), ChangeTime: time.Unix(1350266320, 910238425), AccessTime: time.Unix(1350266320, 910238425), Typeflag: TypeSymlink, Linkname: "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", }, }, }, { file: "testdata/nil-uid.tar", // golang.org/issue/5290 headers: []*Header{ { Name: "P1050238.JPG.log", Mode: 0664, Uid: 0, Gid: 0, Size: 14, ModTime: time.Unix(1365454838, 0), Typeflag: TypeReg, Linkname: "", Uname: "eyefi", Gname: "eyefi", Devmajor: 0, Devminor: 0, }, }, }, { file: "testdata/xattrs.tar", headers: []*Header{ { Name: "small.txt", Mode: 0644, Uid: 1000, Gid: 10, Size: 5, ModTime: time.Unix(1386065770, 448252320), Typeflag: '0', Uname: "alex", Gname: "wheel", AccessTime: time.Unix(1389782991, 419875220), ChangeTime: time.Unix(1389782956, 794414986), Xattrs: map[string]string{ "user.key": "value", "user.key2": "value2", // Interestingly, selinux encodes the terminating null inside the xattr "security.selinux": "unconfined_u:object_r:default_t:s0\x00", }, }, { Name: "small2.txt", Mode: 0644, Uid: 1000, Gid: 10, Size: 11, ModTime: time.Unix(1386065770, 449252304), Typeflag: '0', Uname: "alex", Gname: "wheel", AccessTime: time.Unix(1389782991, 419875220), ChangeTime: time.Unix(1386065770, 449252304), Xattrs: map[string]string{ "security.selinux": "unconfined_u:object_r:default_t:s0\x00", }, }, }, }, } func TestReader(t *testing.T) { testLoop: for i, test := range untarTests { f, err := os.Open(test.file) if err != nil { t.Errorf("test %d: Unexpected error: %v", i, err) continue } defer f.Close() tr := NewReader(f) for j, header := range test.headers { hdr, err := tr.Next() if err != nil || hdr == nil { t.Errorf("test %d, entry %d: Didn't get entry: %v", i, j, err) f.Close() continue testLoop } if !reflect.DeepEqual(*hdr, *header) { t.Errorf("test %d, entry %d: Incorrect header:\nhave %+v\nwant %+v", i, j, *hdr, *header) } } hdr, err := tr.Next() if err == io.EOF { continue testLoop } if hdr != nil || err != nil { t.Errorf("test %d: Unexpected entry or error: hdr=%v err=%v", i, hdr, err) } } } func TestPartialRead(t *testing.T) { f, err := os.Open("testdata/gnu.tar") if err != nil { t.Fatalf("Unexpected error: %v", err) } defer f.Close() tr := NewReader(f) // Read the first four bytes; Next() should skip the last byte. hdr, err := tr.Next() if err != nil || hdr == nil { t.Fatalf("Didn't get first file: %v", err) } buf := make([]byte, 4) if _, err := io.ReadFull(tr, buf); err != nil { t.Fatalf("Unexpected error: %v", err) } if expected := []byte("Kilt"); !bytes.Equal(buf, expected) { t.Errorf("Contents = %v, want %v", buf, expected) } // Second file hdr, err = tr.Next() if err != nil || hdr == nil { t.Fatalf("Didn't get second file: %v", err) } buf = make([]byte, 6) if _, err := io.ReadFull(tr, buf); err != nil { t.Fatalf("Unexpected error: %v", err) } if expected := []byte("Google"); !bytes.Equal(buf, expected) { t.Errorf("Contents = %v, want %v", buf, expected) } } func TestIncrementalRead(t *testing.T) { test := gnuTarTest f, err := os.Open(test.file) if err != nil { t.Fatalf("Unexpected error: %v", err) } defer f.Close() tr := NewReader(f) headers := test.headers cksums := test.cksums nread := 0 // loop over all files for ; ; nread++ { hdr, err := tr.Next() if hdr == nil || err == io.EOF { break } // check the header if !reflect.DeepEqual(*hdr, *headers[nread]) { t.Errorf("Incorrect header:\nhave %+v\nwant %+v", *hdr, headers[nread]) } // read file contents in little chunks EOF, // checksumming all the way h := md5.New() rdbuf := make([]uint8, 8) for { nr, err := tr.Read(rdbuf) if err == io.EOF { break } if err != nil { t.Errorf("Read: unexpected error %v\n", err) break } h.Write(rdbuf[0:nr]) } // verify checksum have := fmt.Sprintf("%x", h.Sum(nil)) want := cksums[nread] if want != have { t.Errorf("Bad checksum on file %s:\nhave %+v\nwant %+v", hdr.Name, have, want) } } if nread != len(headers) { t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(headers), nread) } } func TestNonSeekable(t *testing.T) { test := gnuTarTest f, err := os.Open(test.file) if err != nil { t.Fatalf("Unexpected error: %v", err) } defer f.Close() type readerOnly struct { io.Reader } tr := NewReader(readerOnly{f}) nread := 0 for ; ; nread++ { _, err := tr.Next() if err == io.EOF { break } if err != nil { t.Fatalf("Unexpected error: %v", err) } } if nread != len(test.headers) { t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(test.headers), nread) } } func TestParsePAXHeader(t *testing.T) { paxTests := [][3]string{ {"a", "a=name", "10 a=name\n"}, // Test case involving multiple acceptable lengths {"a", "a=name", "9 a=name\n"}, // Test case involving multiple acceptable length {"mtime", "mtime=1350244992.023960108", "30 mtime=1350244992.023960108\n"}} for _, test := range paxTests { key, expected, raw := test[0], test[1], test[2] reader := bytes.NewReader([]byte(raw)) headers, err := parsePAX(reader) if err != nil { t.Errorf("Couldn't parse correctly formatted headers: %v", err) continue } if strings.EqualFold(headers[key], expected) { t.Errorf("mtime header incorrectly parsed: got %s, wanted %s", headers[key], expected) continue } trailer := make([]byte, 100) n, err := reader.Read(trailer) if err != io.EOF || n != 0 { t.Error("Buffer wasn't consumed") } } badHeaderTests := [][]byte{ []byte("3 somelongkey=\n"), []byte("50 tooshort=\n"), } for _, test := range badHeaderTests { if _, err := parsePAX(bytes.NewReader(test)); err != ErrHeader { t.Fatal("Unexpected success when parsing bad header") } } } func TestParsePAXTime(t *testing.T) { // Some valid PAX time values timestamps := map[string]time.Time{ "1350244992.023960108": time.Unix(1350244992, 23960108), // The common case "1350244992.02396010": time.Unix(1350244992, 23960100), // Lower precision value "1350244992.0239601089": time.Unix(1350244992, 23960108), // Higher precision value "1350244992": time.Unix(1350244992, 0), // Low precision value } for input, expected := range timestamps { ts, err := parsePAXTime(input) if err != nil { t.Fatal(err) } if !ts.Equal(expected) { t.Fatalf("Time parsing failure %s %s", ts, expected) } } } func TestMergePAX(t *testing.T) { hdr := new(Header) // Test a string, integer, and time based value. headers := map[string]string{ "path": "a/b/c", "uid": "1000", "mtime": "1350244992.023960108", } err := mergePAX(hdr, headers) if err != nil { t.Fatal(err) } want := &Header{ Name: "a/b/c", Uid: 1000, ModTime: time.Unix(1350244992, 23960108), } if !reflect.DeepEqual(hdr, want) { t.Errorf("incorrect merge: got %+v, want %+v", hdr, want) } } func TestSparseEndToEnd(t *testing.T) { test := sparseTarTest f, err := os.Open(test.file) if err != nil { t.Fatalf("Unexpected error: %v", err) } defer f.Close() tr := NewReader(f) headers := test.headers cksums := test.cksums nread := 0 // loop over all files for ; ; nread++ { hdr, err := tr.Next() if hdr == nil || err == io.EOF { break } // check the header if !reflect.DeepEqual(*hdr, *headers[nread]) { t.Errorf("Incorrect header:\nhave %+v\nwant %+v", *hdr, headers[nread]) } // read and checksum the file data h := md5.New() _, err = io.Copy(h, tr) if err != nil { t.Fatalf("Unexpected error: %v", err) } // verify checksum have := fmt.Sprintf("%x", h.Sum(nil)) want := cksums[nread] if want != have { t.Errorf("Bad checksum on file %s:\nhave %+v\nwant %+v", hdr.Name, have, want) } } if nread != len(headers) { t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(headers), nread) } } type sparseFileReadTest struct { sparseData []byte sparseMap []sparseEntry realSize int64 expected []byte } var sparseFileReadTests = []sparseFileReadTest{ { sparseData: []byte("abcde"), sparseMap: []sparseEntry{ {offset: 0, numBytes: 2}, {offset: 5, numBytes: 3}, }, realSize: 8, expected: []byte("ab\x00\x00\x00cde"), }, { sparseData: []byte("abcde"), sparseMap: []sparseEntry{ {offset: 0, numBytes: 2}, {offset: 5, numBytes: 3}, }, realSize: 10, expected: []byte("ab\x00\x00\x00cde\x00\x00"), }, { sparseData: []byte("abcde"), sparseMap: []sparseEntry{ {offset: 1, numBytes: 3}, {offset: 6, numBytes: 2}, }, realSize: 8, expected: []byte("\x00abc\x00\x00de"), }, { sparseData: []byte("abcde"), sparseMap: []sparseEntry{ {offset: 1, numBytes: 3}, {offset: 6, numBytes: 2}, }, realSize: 10, expected: []byte("\x00abc\x00\x00de\x00\x00"), }, { sparseData: []byte(""), sparseMap: nil, realSize: 2, expected: []byte("\x00\x00"), }, } func TestSparseFileReader(t *testing.T) { for i, test := range sparseFileReadTests { r := bytes.NewReader(test.sparseData) nb := int64(r.Len()) sfr := &sparseFileReader{ rfr: ®FileReader{r: r, nb: nb}, sp: test.sparseMap, pos: 0, tot: test.realSize, } if sfr.numBytes() != nb { t.Errorf("test %d: Before reading, sfr.numBytes() = %d, want %d", i, sfr.numBytes(), nb) } buf, err := ioutil.ReadAll(sfr) if err != nil { t.Errorf("test %d: Unexpected error: %v", i, err) } if e := test.expected; !bytes.Equal(buf, e) { t.Errorf("test %d: Contents = %v, want %v", i, buf, e) } if sfr.numBytes() != 0 { t.Errorf("test %d: After draining the reader, numBytes() was nonzero", i) } } } func TestSparseIncrementalRead(t *testing.T) { sparseMap := []sparseEntry{{10, 2}} sparseData := []byte("Go") expected := "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00Go\x00\x00\x00\x00\x00\x00\x00\x00" r := bytes.NewReader(sparseData) nb := int64(r.Len()) sfr := &sparseFileReader{ rfr: ®FileReader{r: r, nb: nb}, sp: sparseMap, pos: 0, tot: int64(len(expected)), } // We'll read the data 6 bytes at a time, with a hole of size 10 at // the beginning and one of size 8 at the end. var outputBuf bytes.Buffer buf := make([]byte, 6) for { n, err := sfr.Read(buf) if err == io.EOF { break } if err != nil { t.Errorf("Read: unexpected error %v\n", err) } if n > 0 { _, err := outputBuf.Write(buf[:n]) if err != nil { t.Errorf("Write: unexpected error %v\n", err) } } } got := outputBuf.String() if got != expected { t.Errorf("Contents = %v, want %v", got, expected) } } func TestReadGNUSparseMap0x1(t *testing.T) { headers := map[string]string{ paxGNUSparseNumBlocks: "4", paxGNUSparseMap: "0,5,10,5,20,5,30,5", } expected := []sparseEntry{ {offset: 0, numBytes: 5}, {offset: 10, numBytes: 5}, {offset: 20, numBytes: 5}, {offset: 30, numBytes: 5}, } sp, err := readGNUSparseMap0x1(headers) if err != nil { t.Errorf("Unexpected error: %v", err) } if !reflect.DeepEqual(sp, expected) { t.Errorf("Incorrect sparse map: got %v, wanted %v", sp, expected) } } func TestReadGNUSparseMap1x0(t *testing.T) { // This test uses lots of holes so the sparse header takes up more than two blocks numEntries := 100 expected := make([]sparseEntry, 0, numEntries) sparseMap := new(bytes.Buffer) fmt.Fprintf(sparseMap, "%d\n", numEntries) for i := 0; i < numEntries; i++ { offset := int64(2048 * i) numBytes := int64(1024) expected = append(expected, sparseEntry{offset: offset, numBytes: numBytes}) fmt.Fprintf(sparseMap, "%d\n%d\n", offset, numBytes) } // Make the header the smallest multiple of blockSize that fits the sparseMap headerBlocks := (sparseMap.Len() + blockSize - 1) / blockSize bufLen := blockSize * headerBlocks buf := make([]byte, bufLen) copy(buf, sparseMap.Bytes()) // Get an reader to read the sparse map r := bytes.NewReader(buf) // Read the sparse map sp, err := readGNUSparseMap1x0(r) if err != nil { t.Errorf("Unexpected error: %v", err) } if !reflect.DeepEqual(sp, expected) { t.Errorf("Incorrect sparse map: got %v, wanted %v", sp, expected) } } func TestUninitializedRead(t *testing.T) { test := gnuTarTest f, err := os.Open(test.file) if err != nil { t.Fatalf("Unexpected error: %v", err) } defer f.Close() tr := NewReader(f) _, err = tr.Read([]byte{}) if err == nil || err != io.EOF { t.Errorf("Unexpected error: %v, wanted %v", err, io.EOF) } } // Negative header size should not cause panic. // Issues 10959 and 10960. func TestNegativeHdrSize(t *testing.T) { f, err := os.Open("testdata/neg-size.tar") if err != nil { t.Fatal(err) } defer f.Close() r := NewReader(f) _, err = r.Next() if err != ErrHeader { t.Error("want ErrHeader, got", err) } io.Copy(ioutil.Discard, r) } // This used to hang in (*sparseFileReader).readHole due to missing // verification of sparse offsets against file size. func TestIssue10968(t *testing.T) { f, err := os.Open("testdata/issue10968.tar") if err != nil { t.Fatal(err) } defer f.Close() r := NewReader(f) _, err = r.Next() if err != nil { t.Fatal(err) } _, err = io.Copy(ioutil.Discard, r) if err != io.ErrUnexpectedEOF { t.Fatalf("expected %q, got %q", io.ErrUnexpectedEOF, err) } } // Do not panic if there are errors in header blocks after the pax header. // Issue 11169 func TestIssue11169(t *testing.T) { f, err := os.Open("testdata/issue11169.tar") if err != nil { t.Fatal(err) } defer f.Close() r := NewReader(f) _, err = r.Next() if err == nil { t.Fatal("Unexpected success") } } tar-split-0.9.10/archive/tar/stat_atim.go000066400000000000000000000006361260131227100202430ustar00rootroot00000000000000// Copyright 2012 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // +build linux dragonfly openbsd solaris package tar import ( "syscall" "time" ) func statAtime(st *syscall.Stat_t) time.Time { return time.Unix(st.Atim.Unix()) } func statCtime(st *syscall.Stat_t) time.Time { return time.Unix(st.Ctim.Unix()) } tar-split-0.9.10/archive/tar/stat_atimespec.go000066400000000000000000000006361260131227100212630ustar00rootroot00000000000000// Copyright 2012 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // +build darwin freebsd netbsd package tar import ( "syscall" "time" ) func statAtime(st *syscall.Stat_t) time.Time { return time.Unix(st.Atimespec.Unix()) } func statCtime(st *syscall.Stat_t) time.Time { return time.Unix(st.Ctimespec.Unix()) } tar-split-0.9.10/archive/tar/stat_unix.go000066400000000000000000000013141260131227100202660ustar00rootroot00000000000000// Copyright 2012 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // +build linux darwin dragonfly freebsd openbsd netbsd solaris package tar import ( "os" "syscall" ) func init() { sysStat = statUnix } func statUnix(fi os.FileInfo, h *Header) error { sys, ok := fi.Sys().(*syscall.Stat_t) if !ok { return nil } h.Uid = int(sys.Uid) h.Gid = int(sys.Gid) // TODO(bradfitz): populate username & group. os/user // doesn't cache LookupId lookups, and lacks group // lookup functions. h.AccessTime = statAtime(sys) h.ChangeTime = statCtime(sys) // TODO(bradfitz): major/minor device numbers? return nil } tar-split-0.9.10/archive/tar/tar_test.go000066400000000000000000000174431260131227100201070ustar00rootroot00000000000000// Copyright 2012 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package tar import ( "bytes" "io/ioutil" "os" "path" "reflect" "strings" "testing" "time" ) func TestFileInfoHeader(t *testing.T) { fi, err := os.Stat("testdata/small.txt") if err != nil { t.Fatal(err) } h, err := FileInfoHeader(fi, "") if err != nil { t.Fatalf("FileInfoHeader: %v", err) } if g, e := h.Name, "small.txt"; g != e { t.Errorf("Name = %q; want %q", g, e) } if g, e := h.Mode, int64(fi.Mode().Perm())|c_ISREG; g != e { t.Errorf("Mode = %#o; want %#o", g, e) } if g, e := h.Size, int64(5); g != e { t.Errorf("Size = %v; want %v", g, e) } if g, e := h.ModTime, fi.ModTime(); !g.Equal(e) { t.Errorf("ModTime = %v; want %v", g, e) } // FileInfoHeader should error when passing nil FileInfo if _, err := FileInfoHeader(nil, ""); err == nil { t.Fatalf("Expected error when passing nil to FileInfoHeader") } } func TestFileInfoHeaderDir(t *testing.T) { fi, err := os.Stat("testdata") if err != nil { t.Fatal(err) } h, err := FileInfoHeader(fi, "") if err != nil { t.Fatalf("FileInfoHeader: %v", err) } if g, e := h.Name, "testdata/"; g != e { t.Errorf("Name = %q; want %q", g, e) } // Ignoring c_ISGID for golang.org/issue/4867 if g, e := h.Mode&^c_ISGID, int64(fi.Mode().Perm())|c_ISDIR; g != e { t.Errorf("Mode = %#o; want %#o", g, e) } if g, e := h.Size, int64(0); g != e { t.Errorf("Size = %v; want %v", g, e) } if g, e := h.ModTime, fi.ModTime(); !g.Equal(e) { t.Errorf("ModTime = %v; want %v", g, e) } } func TestFileInfoHeaderSymlink(t *testing.T) { h, err := FileInfoHeader(symlink{}, "some-target") if err != nil { t.Fatal(err) } if g, e := h.Name, "some-symlink"; g != e { t.Errorf("Name = %q; want %q", g, e) } if g, e := h.Linkname, "some-target"; g != e { t.Errorf("Linkname = %q; want %q", g, e) } } type symlink struct{} func (symlink) Name() string { return "some-symlink" } func (symlink) Size() int64 { return 0 } func (symlink) Mode() os.FileMode { return os.ModeSymlink } func (symlink) ModTime() time.Time { return time.Time{} } func (symlink) IsDir() bool { return false } func (symlink) Sys() interface{} { return nil } func TestRoundTrip(t *testing.T) { data := []byte("some file contents") var b bytes.Buffer tw := NewWriter(&b) hdr := &Header{ Name: "file.txt", Uid: 1 << 21, // too big for 8 octal digits Size: int64(len(data)), ModTime: time.Now(), } // tar only supports second precision. hdr.ModTime = hdr.ModTime.Add(-time.Duration(hdr.ModTime.Nanosecond()) * time.Nanosecond) if err := tw.WriteHeader(hdr); err != nil { t.Fatalf("tw.WriteHeader: %v", err) } if _, err := tw.Write(data); err != nil { t.Fatalf("tw.Write: %v", err) } if err := tw.Close(); err != nil { t.Fatalf("tw.Close: %v", err) } // Read it back. tr := NewReader(&b) rHdr, err := tr.Next() if err != nil { t.Fatalf("tr.Next: %v", err) } if !reflect.DeepEqual(rHdr, hdr) { t.Errorf("Header mismatch.\n got %+v\nwant %+v", rHdr, hdr) } rData, err := ioutil.ReadAll(tr) if err != nil { t.Fatalf("Read: %v", err) } if !bytes.Equal(rData, data) { t.Errorf("Data mismatch.\n got %q\nwant %q", rData, data) } } type headerRoundTripTest struct { h *Header fm os.FileMode } func TestHeaderRoundTrip(t *testing.T) { golden := []headerRoundTripTest{ // regular file. { h: &Header{ Name: "test.txt", Mode: 0644 | c_ISREG, Size: 12, ModTime: time.Unix(1360600916, 0), Typeflag: TypeReg, }, fm: 0644, }, // symbolic link. { h: &Header{ Name: "link.txt", Mode: 0777 | c_ISLNK, Size: 0, ModTime: time.Unix(1360600852, 0), Typeflag: TypeSymlink, }, fm: 0777 | os.ModeSymlink, }, // character device node. { h: &Header{ Name: "dev/null", Mode: 0666 | c_ISCHR, Size: 0, ModTime: time.Unix(1360578951, 0), Typeflag: TypeChar, }, fm: 0666 | os.ModeDevice | os.ModeCharDevice, }, // block device node. { h: &Header{ Name: "dev/sda", Mode: 0660 | c_ISBLK, Size: 0, ModTime: time.Unix(1360578954, 0), Typeflag: TypeBlock, }, fm: 0660 | os.ModeDevice, }, // directory. { h: &Header{ Name: "dir/", Mode: 0755 | c_ISDIR, Size: 0, ModTime: time.Unix(1360601116, 0), Typeflag: TypeDir, }, fm: 0755 | os.ModeDir, }, // fifo node. { h: &Header{ Name: "dev/initctl", Mode: 0600 | c_ISFIFO, Size: 0, ModTime: time.Unix(1360578949, 0), Typeflag: TypeFifo, }, fm: 0600 | os.ModeNamedPipe, }, // setuid. { h: &Header{ Name: "bin/su", Mode: 0755 | c_ISREG | c_ISUID, Size: 23232, ModTime: time.Unix(1355405093, 0), Typeflag: TypeReg, }, fm: 0755 | os.ModeSetuid, }, // setguid. { h: &Header{ Name: "group.txt", Mode: 0750 | c_ISREG | c_ISGID, Size: 0, ModTime: time.Unix(1360602346, 0), Typeflag: TypeReg, }, fm: 0750 | os.ModeSetgid, }, // sticky. { h: &Header{ Name: "sticky.txt", Mode: 0600 | c_ISREG | c_ISVTX, Size: 7, ModTime: time.Unix(1360602540, 0), Typeflag: TypeReg, }, fm: 0600 | os.ModeSticky, }, // hard link. { h: &Header{ Name: "hard.txt", Mode: 0644 | c_ISREG, Size: 0, Linkname: "file.txt", ModTime: time.Unix(1360600916, 0), Typeflag: TypeLink, }, fm: 0644, }, // More information. { h: &Header{ Name: "info.txt", Mode: 0600 | c_ISREG, Size: 0, Uid: 1000, Gid: 1000, ModTime: time.Unix(1360602540, 0), Uname: "slartibartfast", Gname: "users", Typeflag: TypeReg, }, fm: 0600, }, } for i, g := range golden { fi := g.h.FileInfo() h2, err := FileInfoHeader(fi, "") if err != nil { t.Error(err) continue } if strings.Contains(fi.Name(), "/") { t.Errorf("FileInfo of %q contains slash: %q", g.h.Name, fi.Name()) } name := path.Base(g.h.Name) if fi.IsDir() { name += "/" } if got, want := h2.Name, name; got != want { t.Errorf("i=%d: Name: got %v, want %v", i, got, want) } if got, want := h2.Size, g.h.Size; got != want { t.Errorf("i=%d: Size: got %v, want %v", i, got, want) } if got, want := h2.Uid, g.h.Uid; got != want { t.Errorf("i=%d: Uid: got %d, want %d", i, got, want) } if got, want := h2.Gid, g.h.Gid; got != want { t.Errorf("i=%d: Gid: got %d, want %d", i, got, want) } if got, want := h2.Uname, g.h.Uname; got != want { t.Errorf("i=%d: Uname: got %q, want %q", i, got, want) } if got, want := h2.Gname, g.h.Gname; got != want { t.Errorf("i=%d: Gname: got %q, want %q", i, got, want) } if got, want := h2.Linkname, g.h.Linkname; got != want { t.Errorf("i=%d: Linkname: got %v, want %v", i, got, want) } if got, want := h2.Typeflag, g.h.Typeflag; got != want { t.Logf("%#v %#v", g.h, fi.Sys()) t.Errorf("i=%d: Typeflag: got %q, want %q", i, got, want) } if got, want := h2.Mode, g.h.Mode; got != want { t.Errorf("i=%d: Mode: got %o, want %o", i, got, want) } if got, want := fi.Mode(), g.fm; got != want { t.Errorf("i=%d: fi.Mode: got %o, want %o", i, got, want) } if got, want := h2.AccessTime, g.h.AccessTime; got != want { t.Errorf("i=%d: AccessTime: got %v, want %v", i, got, want) } if got, want := h2.ChangeTime, g.h.ChangeTime; got != want { t.Errorf("i=%d: ChangeTime: got %v, want %v", i, got, want) } if got, want := h2.ModTime, g.h.ModTime; got != want { t.Errorf("i=%d: ModTime: got %v, want %v", i, got, want) } if sysh, ok := fi.Sys().(*Header); !ok || sysh != g.h { t.Errorf("i=%d: Sys didn't return original *Header", i) } } } tar-split-0.9.10/archive/tar/testdata/000077500000000000000000000000001260131227100175335ustar00rootroot00000000000000tar-split-0.9.10/archive/tar/testdata/gnu.tar000066400000000000000000000060001260131227100210300ustar00rootroot00000000000000small.txt0000640021650100116100000000000511213074064012105 0ustar dsymondsengKiltssmall2.txt0000640021650100116100000000001311213113114012154 0ustar dsymondsengGoogle.com tar-split-0.9.10/archive/tar/testdata/hardlink.tar000066400000000000000000000050001260131227100220320ustar00rootroot00000000000000file.txt0000644000175000001440000000001712475625017013267 0ustar00vbattsusers00000000000000Slartibartfast hard.txt0000644000175000001440000000000012475625017014735 1file.txtustar00vbattsusers00000000000000tar-split-0.9.10/archive/tar/testdata/issue10968.tar000066400000000000000000000010001260131227100217720ustar00rootroot0000000000000000-821950296ts|s00qwf000011s0100ts|ss0s|ssxSs10100ts|ss0s|ss0qS0t000q0001011s1000t00qfj.S100txS00t000qw010100ts|ssxS00t000qwf000011s10100ts|ss0s|sxSs1000ts|ss00s|sssx100t000q0001s100100t f04011100txS00t000qwf1411tar-split-0.9.10/archive/tar/testdata/issue11169.tar000066400000000000000000000011321260131227100217720ustar00rootroot00000000000000./PaxHeaders.14463/aaa00006440000000000000000000000132125311453710114200xustar0030 00000=00000000000000000000030 00000=00000000000000000000030 00000=000000000000000000000tar-split-0.9.10/archive/tar/testdata/neg-size.tar000066400000000000000000000010001260131227100217530ustar00rootroot0000000000000000-821950296ts|s00qwf000011s10100ts|ss0s|ssxSs10100ts|ss0s|ssqS0t000q0001011s1010t00qf1100txS00t000qw010100ts|ssxS00t000qwf000011s10100ts|ss0s|ssxSs10100ts|ss00s|ss0xS00t000q0001s10100t f04011100txS00t000qwf000011sssxSs10100ts|ss311033624846128380s|ssxS00t000q00001011s10100t00qf04s|ss0s|ssxS00t000q00001011s10100t00x0f0tar-split-0.9.10/archive/tar/testdata/nil-uid.tar000066400000000000000000000020001260131227100215740ustar00rootroot00000000000000P1050238.JPG.log00006640000000001612130627766012777 0ustar eyefieyefi121304042001213062776644,44,POWERONtar-split-0.9.10/archive/tar/testdata/pax.tar000066400000000000000000000240001260131227100210270ustar00rootroot00000000000000a/PaxHeaders.6887/12345678910111213141516171819202122232425262728293031323334353637383940414243444540000644000175000017500000000044612036615200022461 xustar0000000000000000204 path=a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100 30 mtime=1350244992.023960108 30 atime=1350244992.023960108 30 ctime=1350244992.023960108 a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525350000664000175000017500000000000712036615200023454 0ustar00shaneshane00000000000000shaner a/PaxHeaders.6887/b0000644000175000017500000000045012036666720012440 xustar0000000000000000206 linkpath=123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100 30 mtime=1350266320.910238425 30 atime=1350266320.910238425 30 ctime=1350266320.910238425 a/b0000777000175000017500000000000012036666720024004 21234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545ustar00shaneshane00000000000000tar-split-0.9.10/archive/tar/testdata/small.txt000066400000000000000000000000051260131227100213770ustar00rootroot00000000000000Kiltstar-split-0.9.10/archive/tar/testdata/small2.txt000066400000000000000000000000131260131227100214600ustar00rootroot00000000000000Google.com tar-split-0.9.10/archive/tar/testdata/sparse-formats.tar000066400000000000000000000430001260131227100232060ustar00rootroot00000000000000sparse-gnu0000644000175000017500000000013712277442734023356 Sustar daviddavid0000000000100000000001000000000030000000000100000000005000000000010000000000700000000001000000003100000000001100000000001000000000130000000000100000000015000000000010000000001700000000001000000000210000000000100000000023000000000010000000002500000000001000000000270000000000100000000031000000000010000000003300000000001000000000350000000000100000000037000000000010000000004100000000001000000000430000000000100000000045000000000010000000004700000000001000000000510000000000100000000053000000000010000000005500000000001000000000570000000000100000000061000000000010000000006300000000001000000000650000000000100000000067000000000010000000007100000000001000000000730000000000100000000075000000000010000000007700000000001000000001010000000000100000000103000000000010000000010500000000001000000001070000000000100000000111000000000010000000011300000000001000000001150000000000100000000117000000000010000000012100000000001000000001230000000000100000000125000000000010000000012700000000001000000001310000000000100000000133000000000010000000013500000000001000000001370000000000100000000141000000000010000000014300000000001000000001450000000000100000000147000000000010000000015100000000001000000001530000000000100000000155000000000010000000015700000000001000000001610000000000100000000163000000000010000000016500000000001000000001670000000000100000000171000000000010000000017300000000001000000001750000000000100000000177000000000010000000020100000000001000000002030000000000100000000205000000000010000000020700000000001000000002110000000000100000000213000000000010000000021500000000001000000002170000000000100000000221000000000010000000022300000000001000000002250000000000100000000227000000000010000000023100000000001000000002330000000000100000000235000000000010000000023700000000001000000002410000000000100000000243000000000010000000024500000000001000000002470000000000100000000251000000000010000000025300000000001000000002550000000000100000000257000000000010000000026100000000001000000002630000000000100000000265000000000010000000026700000000001000000002710000000000100000000273000000000010000000027500000000001GoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGo!./PaxHeaders.1234/sparse-posix-0.00000644000175000017500000001121112277274257015625 xustar00daviddavid23 GNU.sparse.size=200 27 GNU.sparse.numblocks=95 23 GNU.sparse.offset=1 25 GNU.sparse.numbytes=1 23 GNU.sparse.offset=3 25 GNU.sparse.numbytes=1 23 GNU.sparse.offset=5 25 GNU.sparse.numbytes=1 23 GNU.sparse.offset=7 25 GNU.sparse.numbytes=1 23 GNU.sparse.offset=9 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=11 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=13 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=15 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=17 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=19 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=21 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=23 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=25 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=27 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=29 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=31 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=33 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=35 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=37 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=39 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=41 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=43 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=45 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=47 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=49 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=51 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=53 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=55 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=57 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=59 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=61 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=63 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=65 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=67 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=69 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=71 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=73 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=75 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=77 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=79 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=81 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=83 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=85 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=87 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=89 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=91 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=93 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=95 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=97 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=99 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=101 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=103 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=105 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=107 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=109 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=111 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=113 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=115 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=117 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=119 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=121 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=123 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=125 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=127 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=129 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=131 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=133 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=135 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=137 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=139 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=141 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=143 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=145 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=147 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=149 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=151 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=153 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=155 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=157 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=159 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=161 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=163 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=165 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=167 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=169 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=171 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=173 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=175 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=177 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=179 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=181 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=183 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=185 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=187 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=189 25 GNU.sparse.numbytes=1 sparse-posix-0.00000644000175000017500000000013712277272253012743 0ustar00daviddavidGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGo!./PaxHeaders.1234/sparse-posix-0.10000644000175000017500000000115412277271266015630 xustar00daviddavid23 GNU.sparse.size=200 27 GNU.sparse.numblocks=95 36 GNU.sparse.name=sparse-posix-0.1 534 GNU.sparse.map=1,1,3,1,5,1,7,1,9,1,11,1,13,1,15,1,17,1,19,1,21,1,23,1,25,1,27,1,29,1,31,1,33,1,35,1,37,1,39,1,41,1,43,1,45,1,47,1,49,1,51,1,53,1,55,1,57,1,59,1,61,1,63,1,65,1,67,1,69,1,71,1,73,1,75,1,77,1,79,1,81,1,83,1,85,1,87,1,89,1,91,1,93,1,95,1,97,1,99,1,101,1,103,1,105,1,107,1,109,1,111,1,113,1,115,1,117,1,119,1,121,1,123,1,125,1,127,1,129,1,131,1,133,1,135,1,137,1,139,1,141,1,143,1,145,1,147,1,149,1,151,1,153,1,155,1,157,1,159,1,161,1,163,1,165,1,167,1,169,1,171,1,173,1,175,1,177,1,179,1,181,1,183,1,185,1,187,1,189,1 ./GNUSparseFile.1234/sparse-posix-0.10000644000175000017500000000013712277266750016105 0ustar00daviddavidGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGo!./PaxHeaders.1234/sparse-posix-1.00000644000175000017500000000015312277261432015620 xustar00daviddavid22 GNU.sparse.major=1 22 GNU.sparse.minor=0 36 GNU.sparse.name=sparse-posix-1.0 27 GNU.sparse.realsize=200 ./GNUSparseFile.1234/sparse-posix-1.00000644000175000017500000000213712277260774016107 0ustar00daviddavid95 1 1 3 1 5 1 7 1 9 1 11 1 13 1 15 1 17 1 19 1 21 1 23 1 25 1 27 1 29 1 31 1 33 1 35 1 37 1 39 1 41 1 43 1 45 1 47 1 49 1 51 1 53 1 55 1 57 1 59 1 61 1 63 1 65 1 67 1 69 1 71 1 73 1 75 1 77 1 79 1 81 1 83 1 85 1 87 1 89 1 91 1 93 1 95 1 97 1 99 1 101 1 103 1 105 1 107 1 109 1 111 1 113 1 115 1 117 1 119 1 121 1 123 1 125 1 127 1 129 1 131 1 133 1 135 1 137 1 139 1 141 1 143 1 145 1 147 1 149 1 151 1 153 1 155 1 157 1 159 1 161 1 163 1 165 1 167 1 169 1 171 1 173 1 175 1 177 1 179 1 181 1 183 1 185 1 187 1 189 1 GoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGo!end0000644000175000017500000000000412277447757010527 0ustar daviddavidend tar-split-0.9.10/archive/tar/testdata/star.tar000066400000000000000000000060001260131227100212100ustar00rootroot00000000000000small.txt0000640 0216501 0011610 00000000005 11213575217 0016730 0ustar00dsymondseng0000000 0000000 11213575217 11213575217 tarKiltssmall2.txt0000640 0216501 0011610 00000000013 11213575217 0017011 0ustar00dsymondseng0000000 0000000 11213575217 11213575217 tarGoogle.com tar-split-0.9.10/archive/tar/testdata/ustar.tar000066400000000000000000000040001260131227100213730ustar00rootroot00000000000000file.txt0000644000076500000240000000000612104402656045134 0ustar00shanestaff00000000000000longname/longname/longname/longname/longname/longname/longname/longname/longname/longname/longname/longname/longname/longname/longnamehello tar-split-0.9.10/archive/tar/testdata/v7.tar000066400000000000000000000070001260131227100205740ustar00rootroot00000000000000small.txt 444 216501 11610 5 11213575720 6062 Kilts=$AlD"鯗!"G B 鯗B HAl L㏗ ㏑41uᏔhv㏔H  H 4Hhh hQnn㏔ zB+L/Zthread_helpers@``small2.txt 444 216501 11610 13 11213575720 6163 Google.com =$Al=?lD"鯗!"G B 鯗B HAl D"C!"Q L CL ?l | C41uᏥ ㏑㏔H 41uᏔv㏊Xĥ  hx 4Xĥh㏘ hQnn㏊ /)+LH/m(mtar-split-0.9.10/archive/tar/testdata/writer-big-long.tar000066400000000000000000000100001260131227100232420ustar00rootroot0000000000000016gig.txt00006440001750000175012332770507046027 0ustar guillaumeguillaume00000000000000longname/longname/longname/longname/longname/longname/longname/longname/longname/longname/longname/longname/longname/longname/longnametar-split-0.9.10/archive/tar/testdata/writer-big.tar000066400000000000000000000100001260131227100223050ustar00rootroot00000000000000tmp/16gig.txt00006400216501001161011262231050013123 0ustar dsymondseng00000000000000tar-split-0.9.10/archive/tar/testdata/writer.tar000066400000000000000000000070001260131227100215540ustar00rootroot00000000000000small.txt0000640021650100116100000000000511223032352013400 0ustar00dsymondseng00000000000000Kiltssmall2.txt0000640021650100116100000000001311216101324013457 0ustar00dsymondseng00000000000000Google.com link.txt0000777000175000017500000000000011626640112015665 2small.txtustar00stringsstrings00000000000000tar-split-0.9.10/archive/tar/testdata/xattrs.tar000066400000000000000000000120001260131227100215610ustar00rootroot00000000000000./PaxHeaders.29205/small.txt0000644000000000000000000000033512247327552014100 xustar000000000000000029 mtime=1386065770.44825232 29 atime=1389782991.41987522 30 ctime=1389782956.794414986 31 SCHILY.xattr.user.key=value 33 SCHILY.xattr.user.key2=value2 69 SCHILY.xattr.security.selinux=unconfined_u:object_r:default_t:s0 small.txt0000644000175000000120000000000512247327552013040 0ustar00alexwheel00000000000000Kilts./PaxHeaders.29205/small2.txt0000644000000000000000000000023612247327552014162 xustar000000000000000030 mtime=1386065770.449252304 29 atime=1389782991.41987522 30 ctime=1386065770.449252304 69 SCHILY.xattr.security.selinux=unconfined_u:object_r:default_t:s0 small2.txt0000644000175000000120000000001312247327552013121 0ustar00alexwheel00000000000000Google.com tar-split-0.9.10/archive/tar/writer.go000066400000000000000000000271361260131227100175760ustar00rootroot00000000000000// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package tar // TODO(dsymonds): // - catch more errors (no first header, etc.) import ( "bytes" "errors" "fmt" "io" "os" "path" "strconv" "strings" "time" ) var ( ErrWriteTooLong = errors.New("archive/tar: write too long") ErrFieldTooLong = errors.New("archive/tar: header field too long") ErrWriteAfterClose = errors.New("archive/tar: write after close") errNameTooLong = errors.New("archive/tar: name too long") errInvalidHeader = errors.New("archive/tar: header field too long or contains invalid values") ) // A Writer provides sequential writing of a tar archive in POSIX.1 format. // A tar archive consists of a sequence of files. // Call WriteHeader to begin a new file, and then call Write to supply that file's data, // writing at most hdr.Size bytes in total. type Writer struct { w io.Writer err error nb int64 // number of unwritten bytes for current file entry pad int64 // amount of padding to write after current file entry closed bool usedBinary bool // whether the binary numeric field extension was used preferPax bool // use pax header instead of binary numeric header hdrBuff [blockSize]byte // buffer to use in writeHeader when writing a regular header paxHdrBuff [blockSize]byte // buffer to use in writeHeader when writing a pax header } // NewWriter creates a new Writer writing to w. func NewWriter(w io.Writer) *Writer { return &Writer{w: w} } // Flush finishes writing the current file (optional). func (tw *Writer) Flush() error { if tw.nb > 0 { tw.err = fmt.Errorf("archive/tar: missed writing %d bytes", tw.nb) return tw.err } n := tw.nb + tw.pad for n > 0 && tw.err == nil { nr := n if nr > blockSize { nr = blockSize } var nw int nw, tw.err = tw.w.Write(zeroBlock[0:nr]) n -= int64(nw) } tw.nb = 0 tw.pad = 0 return tw.err } // Write s into b, terminating it with a NUL if there is room. // If the value is too long for the field and allowPax is true add a paxheader record instead func (tw *Writer) cString(b []byte, s string, allowPax bool, paxKeyword string, paxHeaders map[string]string) { needsPaxHeader := allowPax && len(s) > len(b) || !isASCII(s) if needsPaxHeader { paxHeaders[paxKeyword] = s return } if len(s) > len(b) { if tw.err == nil { tw.err = ErrFieldTooLong } return } ascii := toASCII(s) copy(b, ascii) if len(ascii) < len(b) { b[len(ascii)] = 0 } } // Encode x as an octal ASCII string and write it into b with leading zeros. func (tw *Writer) octal(b []byte, x int64) { s := strconv.FormatInt(x, 8) // leading zeros, but leave room for a NUL. for len(s)+1 < len(b) { s = "0" + s } tw.cString(b, s, false, paxNone, nil) } // Write x into b, either as octal or as binary (GNUtar/star extension). // If the value is too long for the field and writingPax is enabled both for the field and the add a paxheader record instead func (tw *Writer) numeric(b []byte, x int64, allowPax bool, paxKeyword string, paxHeaders map[string]string) { // Try octal first. s := strconv.FormatInt(x, 8) if len(s) < len(b) { tw.octal(b, x) return } // If it is too long for octal, and pax is preferred, use a pax header if allowPax && tw.preferPax { tw.octal(b, 0) s := strconv.FormatInt(x, 10) paxHeaders[paxKeyword] = s return } // Too big: use binary (big-endian). tw.usedBinary = true for i := len(b) - 1; x > 0 && i >= 0; i-- { b[i] = byte(x) x >>= 8 } b[0] |= 0x80 // highest bit indicates binary format } var ( minTime = time.Unix(0, 0) // There is room for 11 octal digits (33 bits) of mtime. maxTime = minTime.Add((1<<33 - 1) * time.Second) ) // WriteHeader writes hdr and prepares to accept the file's contents. // WriteHeader calls Flush if it is not the first header. // Calling after a Close will return ErrWriteAfterClose. func (tw *Writer) WriteHeader(hdr *Header) error { return tw.writeHeader(hdr, true) } // WriteHeader writes hdr and prepares to accept the file's contents. // WriteHeader calls Flush if it is not the first header. // Calling after a Close will return ErrWriteAfterClose. // As this method is called internally by writePax header to allow it to // suppress writing the pax header. func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { if tw.closed { return ErrWriteAfterClose } if tw.err == nil { tw.Flush() } if tw.err != nil { return tw.err } // a map to hold pax header records, if any are needed paxHeaders := make(map[string]string) // TODO(shanemhansen): we might want to use PAX headers for // subsecond time resolution, but for now let's just capture // too long fields or non ascii characters var header []byte // We need to select which scratch buffer to use carefully, // since this method is called recursively to write PAX headers. // If allowPax is true, this is the non-recursive call, and we will use hdrBuff. // If allowPax is false, we are being called by writePAXHeader, and hdrBuff is // already being used by the non-recursive call, so we must use paxHdrBuff. header = tw.hdrBuff[:] if !allowPax { header = tw.paxHdrBuff[:] } copy(header, zeroBlock) s := slicer(header) // keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax pathHeaderBytes := s.next(fileNameSize) tw.cString(pathHeaderBytes, hdr.Name, true, paxPath, paxHeaders) // Handle out of range ModTime carefully. var modTime int64 if !hdr.ModTime.Before(minTime) && !hdr.ModTime.After(maxTime) { modTime = hdr.ModTime.Unix() } tw.octal(s.next(8), hdr.Mode) // 100:108 tw.numeric(s.next(8), int64(hdr.Uid), true, paxUid, paxHeaders) // 108:116 tw.numeric(s.next(8), int64(hdr.Gid), true, paxGid, paxHeaders) // 116:124 tw.numeric(s.next(12), hdr.Size, true, paxSize, paxHeaders) // 124:136 tw.numeric(s.next(12), modTime, false, paxNone, nil) // 136:148 --- consider using pax for finer granularity s.next(8) // chksum (148:156) s.next(1)[0] = hdr.Typeflag // 156:157 tw.cString(s.next(100), hdr.Linkname, true, paxLinkpath, paxHeaders) copy(s.next(8), []byte("ustar\x0000")) // 257:265 tw.cString(s.next(32), hdr.Uname, true, paxUname, paxHeaders) // 265:297 tw.cString(s.next(32), hdr.Gname, true, paxGname, paxHeaders) // 297:329 tw.numeric(s.next(8), hdr.Devmajor, false, paxNone, nil) // 329:337 tw.numeric(s.next(8), hdr.Devminor, false, paxNone, nil) // 337:345 // keep a reference to the prefix to allow to overwrite it later if we detect that we can use ustar longnames instead of pax prefixHeaderBytes := s.next(155) tw.cString(prefixHeaderBytes, "", false, paxNone, nil) // 345:500 prefix // Use the GNU magic instead of POSIX magic if we used any GNU extensions. if tw.usedBinary { copy(header[257:265], []byte("ustar \x00")) } _, paxPathUsed := paxHeaders[paxPath] // try to use a ustar header when only the name is too long if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed { suffix := hdr.Name prefix := "" if len(hdr.Name) > fileNameSize && isASCII(hdr.Name) { var err error prefix, suffix, err = tw.splitUSTARLongName(hdr.Name) if err == nil { // ok we can use a ustar long name instead of pax, now correct the fields // remove the path field from the pax header. this will suppress the pax header delete(paxHeaders, paxPath) // update the path fields tw.cString(pathHeaderBytes, suffix, false, paxNone, nil) tw.cString(prefixHeaderBytes, prefix, false, paxNone, nil) // Use the ustar magic if we used ustar long names. if len(prefix) > 0 && !tw.usedBinary { copy(header[257:265], []byte("ustar\x00")) } } } } // The chksum field is terminated by a NUL and a space. // This is different from the other octal fields. chksum, _ := checksum(header) tw.octal(header[148:155], chksum) header[155] = ' ' if tw.err != nil { // problem with header; probably integer too big for a field. return tw.err } if allowPax { for k, v := range hdr.Xattrs { paxHeaders[paxXattr+k] = v } } if len(paxHeaders) > 0 { if !allowPax { return errInvalidHeader } if err := tw.writePAXHeader(hdr, paxHeaders); err != nil { return err } } tw.nb = int64(hdr.Size) tw.pad = (blockSize - (tw.nb % blockSize)) % blockSize _, tw.err = tw.w.Write(header) return tw.err } // writeUSTARLongName splits a USTAR long name hdr.Name. // name must be < 256 characters. errNameTooLong is returned // if hdr.Name can't be split. The splitting heuristic // is compatible with gnu tar. func (tw *Writer) splitUSTARLongName(name string) (prefix, suffix string, err error) { length := len(name) if length > fileNamePrefixSize+1 { length = fileNamePrefixSize + 1 } else if name[length-1] == '/' { length-- } i := strings.LastIndex(name[:length], "/") // nlen contains the resulting length in the name field. // plen contains the resulting length in the prefix field. nlen := len(name) - i - 1 plen := i if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize { err = errNameTooLong return } prefix, suffix = name[:i], name[i+1:] return } // writePaxHeader writes an extended pax header to the // archive. func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) error { // Prepare extended header ext := new(Header) ext.Typeflag = TypeXHeader // Setting ModTime is required for reader parsing to // succeed, and seems harmless enough. ext.ModTime = hdr.ModTime // The spec asks that we namespace our pseudo files // with the current pid. pid := os.Getpid() dir, file := path.Split(hdr.Name) fullName := path.Join(dir, fmt.Sprintf("PaxHeaders.%d", pid), file) ascii := toASCII(fullName) if len(ascii) > 100 { ascii = ascii[:100] } ext.Name = ascii // Construct the body var buf bytes.Buffer for k, v := range paxHeaders { fmt.Fprint(&buf, paxHeader(k+"="+v)) } ext.Size = int64(len(buf.Bytes())) if err := tw.writeHeader(ext, false); err != nil { return err } if _, err := tw.Write(buf.Bytes()); err != nil { return err } if err := tw.Flush(); err != nil { return err } return nil } // paxHeader formats a single pax record, prefixing it with the appropriate length func paxHeader(msg string) string { const padding = 2 // Extra padding for space and newline size := len(msg) + padding size += len(strconv.Itoa(size)) record := fmt.Sprintf("%d %s\n", size, msg) if len(record) != size { // Final adjustment if adding size increased // the number of digits in size size = len(record) record = fmt.Sprintf("%d %s\n", size, msg) } return record } // Write writes to the current entry in the tar archive. // Write returns the error ErrWriteTooLong if more than // hdr.Size bytes are written after WriteHeader. func (tw *Writer) Write(b []byte) (n int, err error) { if tw.closed { err = ErrWriteAfterClose return } overwrite := false if int64(len(b)) > tw.nb { b = b[0:tw.nb] overwrite = true } n, err = tw.w.Write(b) tw.nb -= int64(n) if err == nil && overwrite { err = ErrWriteTooLong return } tw.err = err return } // Close closes the tar archive, flushing any unwritten // data to the underlying writer. func (tw *Writer) Close() error { if tw.err != nil || tw.closed { return tw.err } tw.Flush() tw.closed = true if tw.err != nil { return tw.err } // trailer: two zero blocks for i := 0; i < 2; i++ { _, tw.err = tw.w.Write(zeroBlock) if tw.err != nil { break } } return tw.err } tar-split-0.9.10/archive/tar/writer_test.go000066400000000000000000000325361260131227100206350ustar00rootroot00000000000000// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package tar import ( "bytes" "fmt" "io" "io/ioutil" "os" "reflect" "strings" "testing" "testing/iotest" "time" ) type writerTestEntry struct { header *Header contents string } type writerTest struct { file string // filename of expected output entries []*writerTestEntry } var writerTests = []*writerTest{ // The writer test file was produced with this command: // tar (GNU tar) 1.26 // ln -s small.txt link.txt // tar -b 1 --format=ustar -c -f writer.tar small.txt small2.txt link.txt { file: "testdata/writer.tar", entries: []*writerTestEntry{ { header: &Header{ Name: "small.txt", Mode: 0640, Uid: 73025, Gid: 5000, Size: 5, ModTime: time.Unix(1246508266, 0), Typeflag: '0', Uname: "dsymonds", Gname: "eng", }, contents: "Kilts", }, { header: &Header{ Name: "small2.txt", Mode: 0640, Uid: 73025, Gid: 5000, Size: 11, ModTime: time.Unix(1245217492, 0), Typeflag: '0', Uname: "dsymonds", Gname: "eng", }, contents: "Google.com\n", }, { header: &Header{ Name: "link.txt", Mode: 0777, Uid: 1000, Gid: 1000, Size: 0, ModTime: time.Unix(1314603082, 0), Typeflag: '2', Linkname: "small.txt", Uname: "strings", Gname: "strings", }, // no contents }, }, }, // The truncated test file was produced using these commands: // dd if=/dev/zero bs=1048576 count=16384 > /tmp/16gig.txt // tar -b 1 -c -f- /tmp/16gig.txt | dd bs=512 count=8 > writer-big.tar { file: "testdata/writer-big.tar", entries: []*writerTestEntry{ { header: &Header{ Name: "tmp/16gig.txt", Mode: 0640, Uid: 73025, Gid: 5000, Size: 16 << 30, ModTime: time.Unix(1254699560, 0), Typeflag: '0', Uname: "dsymonds", Gname: "eng", }, // fake contents contents: strings.Repeat("\x00", 4<<10), }, }, }, // The truncated test file was produced using these commands: // dd if=/dev/zero bs=1048576 count=16384 > (longname/)*15 /16gig.txt // tar -b 1 -c -f- (longname/)*15 /16gig.txt | dd bs=512 count=8 > writer-big-long.tar { file: "testdata/writer-big-long.tar", entries: []*writerTestEntry{ { header: &Header{ Name: strings.Repeat("longname/", 15) + "16gig.txt", Mode: 0644, Uid: 1000, Gid: 1000, Size: 16 << 30, ModTime: time.Unix(1399583047, 0), Typeflag: '0', Uname: "guillaume", Gname: "guillaume", }, // fake contents contents: strings.Repeat("\x00", 4<<10), }, }, }, // This file was produced using gnu tar 1.17 // gnutar -b 4 --format=ustar (longname/)*15 + file.txt { file: "testdata/ustar.tar", entries: []*writerTestEntry{ { header: &Header{ Name: strings.Repeat("longname/", 15) + "file.txt", Mode: 0644, Uid: 0765, Gid: 024, Size: 06, ModTime: time.Unix(1360135598, 0), Typeflag: '0', Uname: "shane", Gname: "staff", }, contents: "hello\n", }, }, }, // This file was produced using gnu tar 1.26 // echo "Slartibartfast" > file.txt // ln file.txt hard.txt // tar -b 1 --format=ustar -c -f hardlink.tar file.txt hard.txt { file: "testdata/hardlink.tar", entries: []*writerTestEntry{ { header: &Header{ Name: "file.txt", Mode: 0644, Uid: 1000, Gid: 100, Size: 15, ModTime: time.Unix(1425484303, 0), Typeflag: '0', Uname: "vbatts", Gname: "users", }, contents: "Slartibartfast\n", }, { header: &Header{ Name: "hard.txt", Mode: 0644, Uid: 1000, Gid: 100, Size: 0, ModTime: time.Unix(1425484303, 0), Typeflag: '1', Linkname: "file.txt", Uname: "vbatts", Gname: "users", }, // no contents }, }, }, } // Render byte array in a two-character hexadecimal string, spaced for easy visual inspection. func bytestr(offset int, b []byte) string { const rowLen = 32 s := fmt.Sprintf("%04x ", offset) for _, ch := range b { switch { case '0' <= ch && ch <= '9', 'A' <= ch && ch <= 'Z', 'a' <= ch && ch <= 'z': s += fmt.Sprintf(" %c", ch) default: s += fmt.Sprintf(" %02x", ch) } } return s } // Render a pseudo-diff between two blocks of bytes. func bytediff(a []byte, b []byte) string { const rowLen = 32 s := fmt.Sprintf("(%d bytes vs. %d bytes)\n", len(a), len(b)) for offset := 0; len(a)+len(b) > 0; offset += rowLen { na, nb := rowLen, rowLen if na > len(a) { na = len(a) } if nb > len(b) { nb = len(b) } sa := bytestr(offset, a[0:na]) sb := bytestr(offset, b[0:nb]) if sa != sb { s += fmt.Sprintf("-%v\n+%v\n", sa, sb) } a = a[na:] b = b[nb:] } return s } func TestWriter(t *testing.T) { testLoop: for i, test := range writerTests { expected, err := ioutil.ReadFile(test.file) if err != nil { t.Errorf("test %d: Unexpected error: %v", i, err) continue } buf := new(bytes.Buffer) tw := NewWriter(iotest.TruncateWriter(buf, 4<<10)) // only catch the first 4 KB big := false for j, entry := range test.entries { big = big || entry.header.Size > 1<<10 if err := tw.WriteHeader(entry.header); err != nil { t.Errorf("test %d, entry %d: Failed writing header: %v", i, j, err) continue testLoop } if _, err := io.WriteString(tw, entry.contents); err != nil { t.Errorf("test %d, entry %d: Failed writing contents: %v", i, j, err) continue testLoop } } // Only interested in Close failures for the small tests. if err := tw.Close(); err != nil && !big { t.Errorf("test %d: Failed closing archive: %v", i, err) continue testLoop } actual := buf.Bytes() if !bytes.Equal(expected, actual) { t.Errorf("test %d: Incorrect result: (-=expected, +=actual)\n%v", i, bytediff(expected, actual)) } if testing.Short() { // The second test is expensive. break } } } func TestPax(t *testing.T) { // Create an archive with a large name fileinfo, err := os.Stat("testdata/small.txt") if err != nil { t.Fatal(err) } hdr, err := FileInfoHeader(fileinfo, "") if err != nil { t.Fatalf("os.Stat: %v", err) } // Force a PAX long name to be written longName := strings.Repeat("ab", 100) contents := strings.Repeat(" ", int(hdr.Size)) hdr.Name = longName var buf bytes.Buffer writer := NewWriter(&buf) if err := writer.WriteHeader(hdr); err != nil { t.Fatal(err) } if _, err = writer.Write([]byte(contents)); err != nil { t.Fatal(err) } if err := writer.Close(); err != nil { t.Fatal(err) } // Simple test to make sure PAX extensions are in effect if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) { t.Fatal("Expected at least one PAX header to be written.") } // Test that we can get a long name back out of the archive. reader := NewReader(&buf) hdr, err = reader.Next() if err != nil { t.Fatal(err) } if hdr.Name != longName { t.Fatal("Couldn't recover long file name") } } func TestPaxSymlink(t *testing.T) { // Create an archive with a large linkname fileinfo, err := os.Stat("testdata/small.txt") if err != nil { t.Fatal(err) } hdr, err := FileInfoHeader(fileinfo, "") hdr.Typeflag = TypeSymlink if err != nil { t.Fatalf("os.Stat:1 %v", err) } // Force a PAX long linkname to be written longLinkname := strings.Repeat("1234567890/1234567890", 10) hdr.Linkname = longLinkname hdr.Size = 0 var buf bytes.Buffer writer := NewWriter(&buf) if err := writer.WriteHeader(hdr); err != nil { t.Fatal(err) } if err := writer.Close(); err != nil { t.Fatal(err) } // Simple test to make sure PAX extensions are in effect if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) { t.Fatal("Expected at least one PAX header to be written.") } // Test that we can get a long name back out of the archive. reader := NewReader(&buf) hdr, err = reader.Next() if err != nil { t.Fatal(err) } if hdr.Linkname != longLinkname { t.Fatal("Couldn't recover long link name") } } func TestPaxNonAscii(t *testing.T) { // Create an archive with non ascii. These should trigger a pax header // because pax headers have a defined utf-8 encoding. fileinfo, err := os.Stat("testdata/small.txt") if err != nil { t.Fatal(err) } hdr, err := FileInfoHeader(fileinfo, "") if err != nil { t.Fatalf("os.Stat:1 %v", err) } // some sample data chineseFilename := "文件名" chineseGroupname := "組" chineseUsername := "用戶名" hdr.Name = chineseFilename hdr.Gname = chineseGroupname hdr.Uname = chineseUsername contents := strings.Repeat(" ", int(hdr.Size)) var buf bytes.Buffer writer := NewWriter(&buf) if err := writer.WriteHeader(hdr); err != nil { t.Fatal(err) } if _, err = writer.Write([]byte(contents)); err != nil { t.Fatal(err) } if err := writer.Close(); err != nil { t.Fatal(err) } // Simple test to make sure PAX extensions are in effect if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) { t.Fatal("Expected at least one PAX header to be written.") } // Test that we can get a long name back out of the archive. reader := NewReader(&buf) hdr, err = reader.Next() if err != nil { t.Fatal(err) } if hdr.Name != chineseFilename { t.Fatal("Couldn't recover unicode name") } if hdr.Gname != chineseGroupname { t.Fatal("Couldn't recover unicode group") } if hdr.Uname != chineseUsername { t.Fatal("Couldn't recover unicode user") } } func TestPaxXattrs(t *testing.T) { xattrs := map[string]string{ "user.key": "value", } // Create an archive with an xattr fileinfo, err := os.Stat("testdata/small.txt") if err != nil { t.Fatal(err) } hdr, err := FileInfoHeader(fileinfo, "") if err != nil { t.Fatalf("os.Stat: %v", err) } contents := "Kilts" hdr.Xattrs = xattrs var buf bytes.Buffer writer := NewWriter(&buf) if err := writer.WriteHeader(hdr); err != nil { t.Fatal(err) } if _, err = writer.Write([]byte(contents)); err != nil { t.Fatal(err) } if err := writer.Close(); err != nil { t.Fatal(err) } // Test that we can get the xattrs back out of the archive. reader := NewReader(&buf) hdr, err = reader.Next() if err != nil { t.Fatal(err) } if !reflect.DeepEqual(hdr.Xattrs, xattrs) { t.Fatalf("xattrs did not survive round trip: got %+v, want %+v", hdr.Xattrs, xattrs) } } func TestPAXHeader(t *testing.T) { medName := strings.Repeat("CD", 50) longName := strings.Repeat("AB", 100) paxTests := [][2]string{ {paxPath + "=/etc/hosts", "19 path=/etc/hosts\n"}, {"a=b", "6 a=b\n"}, // Single digit length {"a=names", "11 a=names\n"}, // Test case involving carries {paxPath + "=" + longName, fmt.Sprintf("210 path=%s\n", longName)}, {paxPath + "=" + medName, fmt.Sprintf("110 path=%s\n", medName)}} for _, test := range paxTests { key, expected := test[0], test[1] if result := paxHeader(key); result != expected { t.Fatalf("paxHeader: got %s, expected %s", result, expected) } } } func TestUSTARLongName(t *testing.T) { // Create an archive with a path that failed to split with USTAR extension in previous versions. fileinfo, err := os.Stat("testdata/small.txt") if err != nil { t.Fatal(err) } hdr, err := FileInfoHeader(fileinfo, "") hdr.Typeflag = TypeDir if err != nil { t.Fatalf("os.Stat:1 %v", err) } // Force a PAX long name to be written. The name was taken from a practical example // that fails and replaced ever char through numbers to anonymize the sample. longName := "/0000_0000000/00000-000000000/0000_0000000/00000-0000000000000/0000_0000000/00000-0000000-00000000/0000_0000000/00000000/0000_0000000/000/0000_0000000/00000000v00/0000_0000000/000000/0000_0000000/0000000/0000_0000000/00000y-00/0000/0000/00000000/0x000000/" hdr.Name = longName hdr.Size = 0 var buf bytes.Buffer writer := NewWriter(&buf) if err := writer.WriteHeader(hdr); err != nil { t.Fatal(err) } if err := writer.Close(); err != nil { t.Fatal(err) } // Test that we can get a long name back out of the archive. reader := NewReader(&buf) hdr, err = reader.Next() if err != nil { t.Fatal(err) } if hdr.Name != longName { t.Fatal("Couldn't recover long name") } } func TestValidTypeflagWithPAXHeader(t *testing.T) { var buffer bytes.Buffer tw := NewWriter(&buffer) fileName := strings.Repeat("ab", 100) hdr := &Header{ Name: fileName, Size: 4, Typeflag: 0, } if err := tw.WriteHeader(hdr); err != nil { t.Fatalf("Failed to write header: %s", err) } if _, err := tw.Write([]byte("fooo")); err != nil { t.Fatalf("Failed to write the file's data: %s", err) } tw.Close() tr := NewReader(&buffer) for { header, err := tr.Next() if err == io.EOF { break } if err != nil { t.Fatalf("Failed to read header: %s", err) } if header.Typeflag != 0 { t.Fatalf("Typeflag should've been 0, found %d", header.Typeflag) } } } func TestWriteAfterClose(t *testing.T) { var buffer bytes.Buffer tw := NewWriter(&buffer) hdr := &Header{ Name: "small.txt", Size: 5, } if err := tw.WriteHeader(hdr); err != nil { t.Fatalf("Failed to write header: %s", err) } tw.Close() if _, err := tw.Write([]byte("Kilts")); err != ErrWriteAfterClose { t.Fatalf("Write: got %v; want ErrWriteAfterClose", err) } } tar-split-0.9.10/cmd/000077500000000000000000000000001260131227100142565ustar00rootroot00000000000000tar-split-0.9.10/cmd/tar-split/000077500000000000000000000000001260131227100161755ustar00rootroot00000000000000tar-split-0.9.10/cmd/tar-split/README.md000066400000000000000000000016501260131227100174560ustar00rootroot00000000000000# tar-split utility ## Installation go get -u github.com/vbatts/tar-split/cmd/tar-split ## Usage ### Disassembly ```bash $ sha256sum archive.tar d734a748db93ec873392470510b8a1c88929abd8fae2540dc43d5b26f7537868 archive.tar $ mkdir ./x $ tar-split disasm --output tar-data.json.gz ./archive.tar | tar -C ./x -x time="2015-07-20T15:45:04-04:00" level=info msg="created tar-data.json.gz from ./archive.tar (read 204800 bytes)" ``` ### Assembly ```bash $ tar-split asm --output new.tar --input ./tar-data.json.gz --path ./x/ INFO[0000] created new.tar from ./x/ and ./tar-data.json.gz (wrote 204800 bytes) $ sha256sum new.tar d734a748db93ec873392470510b8a1c88929abd8fae2540dc43d5b26f7537868 new.tar ``` ### Estimating metadata size ```bash $ tar-split checksize ./archive.tar inspecting "./archive.tar" (size 200k) -- number of files: 28 -- size of metadata uncompressed: 28k -- size of gzip compressed metadata: 1k ``` tar-split-0.9.10/cmd/tar-split/asm.go000066400000000000000000000026741260131227100173150ustar00rootroot00000000000000package main import ( "compress/gzip" "io" "os" "github.com/Sirupsen/logrus" "github.com/codegangsta/cli" "github.com/vbatts/tar-split/tar/asm" "github.com/vbatts/tar-split/tar/storage" ) func CommandAsm(c *cli.Context) { if len(c.Args()) > 0 { logrus.Warnf("%d additional arguments passed are ignored", len(c.Args())) } if len(c.String("input")) == 0 { logrus.Fatalf("--input filename must be set") } if len(c.String("output")) == 0 { logrus.Fatalf("--output filename must be set ([FILENAME|-])") } if len(c.String("path")) == 0 { logrus.Fatalf("--path must be set") } var outputStream io.Writer if c.String("output") == "-" { outputStream = os.Stdout } else { fh, err := os.Create(c.String("output")) if err != nil { logrus.Fatal(err) } defer fh.Close() outputStream = fh } // Get the tar metadata reader mf, err := os.Open(c.String("input")) if err != nil { logrus.Fatal(err) } defer mf.Close() mfz, err := gzip.NewReader(mf) if err != nil { logrus.Fatal(err) } defer mfz.Close() metaUnpacker := storage.NewJSONUnpacker(mfz) // XXX maybe get the absolute path here fileGetter := storage.NewPathFileGetter(c.String("path")) ots := asm.NewOutputTarStream(fileGetter, metaUnpacker) defer ots.Close() i, err := io.Copy(outputStream, ots) if err != nil { logrus.Fatal(err) } logrus.Infof("created %s from %s and %s (wrote %d bytes)", c.String("output"), c.String("path"), c.String("input"), i) } tar-split-0.9.10/cmd/tar-split/checksize.go000066400000000000000000000041541260131227100205000ustar00rootroot00000000000000package main import ( "archive/tar" "compress/gzip" "fmt" "io" "io/ioutil" "log" "os" "github.com/Sirupsen/logrus" "github.com/codegangsta/cli" "github.com/vbatts/tar-split/tar/asm" "github.com/vbatts/tar-split/tar/storage" ) func CommandChecksize(c *cli.Context) { if len(c.Args()) == 0 { logrus.Fatalf("please specify tar archives to check ('-' will check stdin)") } for _, arg := range c.Args() { fh, err := os.Open(arg) if err != nil { log.Fatal(err) } defer fh.Close() fi, err := fh.Stat() if err != nil { log.Fatal(err) } fmt.Printf("inspecting %q (size %dk)\n", fh.Name(), fi.Size()/1024) packFh, err := ioutil.TempFile("", "packed.") if err != nil { log.Fatal(err) } defer packFh.Close() if !c.Bool("work") { defer os.Remove(packFh.Name()) } else { fmt.Printf(" -- working file preserved: %s\n", packFh.Name()) } sp := storage.NewJSONPacker(packFh) fp := storage.NewDiscardFilePutter() dissam, err := asm.NewInputTarStream(fh, sp, fp) if err != nil { log.Fatal(err) } var num int tr := tar.NewReader(dissam) for { _, err = tr.Next() if err != nil { if err == io.EOF { break } log.Fatal(err) } num++ if _, err := io.Copy(ioutil.Discard, tr); err != nil { log.Fatal(err) } } fmt.Printf(" -- number of files: %d\n", num) if err := packFh.Sync(); err != nil { log.Fatal(err) } fi, err = packFh.Stat() if err != nil { log.Fatal(err) } fmt.Printf(" -- size of metadata uncompressed: %dk\n", fi.Size()/1024) gzPackFh, err := ioutil.TempFile("", "packed.gz.") if err != nil { log.Fatal(err) } defer gzPackFh.Close() if !c.Bool("work") { defer os.Remove(gzPackFh.Name()) } gzWrtr := gzip.NewWriter(gzPackFh) if _, err := packFh.Seek(0, 0); err != nil { log.Fatal(err) } if _, err := io.Copy(gzWrtr, packFh); err != nil { log.Fatal(err) } gzWrtr.Close() if err := gzPackFh.Sync(); err != nil { log.Fatal(err) } fi, err = gzPackFh.Stat() if err != nil { log.Fatal(err) } fmt.Printf(" -- size of gzip compressed metadata: %dk\n", fi.Size()/1024) } } tar-split-0.9.10/cmd/tar-split/disasm.go000066400000000000000000000024431260131227100200070ustar00rootroot00000000000000package main import ( "compress/gzip" "io" "os" "github.com/Sirupsen/logrus" "github.com/codegangsta/cli" "github.com/vbatts/tar-split/tar/asm" "github.com/vbatts/tar-split/tar/storage" ) func CommandDisasm(c *cli.Context) { if len(c.Args()) != 1 { logrus.Fatalf("please specify tar to be disabled ") } if len(c.String("output")) == 0 { logrus.Fatalf("--output filename must be set") } // Set up the tar input stream var inputStream io.Reader if c.Args()[0] == "-" { inputStream = os.Stdin } else { fh, err := os.Open(c.Args()[0]) if err != nil { logrus.Fatal(err) } defer fh.Close() inputStream = fh } // Set up the metadata storage mf, err := os.OpenFile(c.String("output"), os.O_CREATE|os.O_WRONLY|os.O_TRUNC, os.FileMode(0600)) if err != nil { logrus.Fatal(err) } defer mf.Close() mfz := gzip.NewWriter(mf) defer mfz.Close() metaPacker := storage.NewJSONPacker(mfz) // we're passing nil here for the file putter, because the ApplyDiff will // handle the extraction of the archive its, err := asm.NewInputTarStream(inputStream, metaPacker, nil) if err != nil { logrus.Fatal(err) } i, err := io.Copy(os.Stdout, its) if err != nil { logrus.Fatal(err) } logrus.Infof("created %s from %s (read %d bytes)", c.String("output"), c.Args()[0], i) } tar-split-0.9.10/cmd/tar-split/main.go000066400000000000000000000034251260131227100174540ustar00rootroot00000000000000package main import ( "os" "github.com/Sirupsen/logrus" "github.com/codegangsta/cli" "github.com/vbatts/tar-split/version" ) func main() { app := cli.NewApp() app.Name = "tar-split" app.Usage = "tar assembly and disassembly utility" app.Version = version.VERSION app.Author = "Vincent Batts" app.Email = "vbatts@hashbangbash.com" app.Action = cli.ShowAppHelp app.Before = func(c *cli.Context) error { logrus.SetOutput(os.Stderr) if c.Bool("debug") { logrus.SetLevel(logrus.DebugLevel) } return nil } app.Flags = []cli.Flag{ cli.BoolFlag{ Name: "debug, D", Usage: "debug output", // defaults to false }, } app.Commands = []cli.Command{ { Name: "disasm", Aliases: []string{"d"}, Usage: "disassemble the input tar stream", Action: CommandDisasm, Flags: []cli.Flag{ cli.StringFlag{ Name: "output", Value: "tar-data.json.gz", Usage: "output of disassembled tar stream", }, }, }, { Name: "asm", Aliases: []string{"a"}, Usage: "assemble tar stream", Action: CommandAsm, Flags: []cli.Flag{ cli.StringFlag{ Name: "input", Value: "tar-data.json.gz", Usage: "input of disassembled tar stream", }, cli.StringFlag{ Name: "output", Value: "-", Usage: "reassembled tar archive", }, cli.StringFlag{ Name: "path", Value: "", Usage: "relative path of extracted tar", }, }, }, { Name: "checksize", Usage: "displays size estimates for metadata storage of a Tar archive", Action: CommandChecksize, Flags: []cli.Flag{ cli.BoolFlag{ Name: "work", Usage: "do not delete the working directory", // defaults to false }, }, }, } if err := app.Run(os.Args); err != nil { logrus.Fatal(err) } } tar-split-0.9.10/concept/000077500000000000000000000000001260131227100151465ustar00rootroot00000000000000tar-split-0.9.10/concept/DESIGN.md000066400000000000000000000061571260131227100164520ustar00rootroot00000000000000# Flow of TAR stream ## `./archive/tar` The import path `github.com/vbatts/tar-split/archive/tar` is fork of upstream golang stdlib [`archive/tar`](http://golang.org/pkg/archive/tar/). It adds plumbing to access raw bytes of the tar stream as the headers and payload are read. ## Packer interface For ease of storage and usage of the raw bytes, there will be a storage interface, that accepts an io.Writer (This way you could pass it an in memory buffer or a file handle). Having a Packer interface can allow configuration of hash.Hash for file payloads and providing your own io.Writer. Instead of having a state directory to store all the header information for all Readers, we will leave that up to user of Reader. Because we can not assume an ID for each Reader, and keeping that information differentiated. ## State Directory Perhaps we could deduplicate the header info, by hashing the rawbytes and storing them in a directory tree like: ./ac/dc/beef Then reference the hash of the header info, in the positional records for the tar stream. Though this could be a future feature, and not required for an initial implementation. Also, this would imply an owned state directory, rather than just writing storage info to an io.Writer. ## Concept Example First we'll get an archive to work with. For repeatability, we'll make an archive from what you've just cloned: ``` git archive --format=tar -o tar-split.tar HEAD . ``` Then build the example main.go: ``` go build ./main.go ``` Now run the example over the archive: ``` $ ./main tar-split.tar 2015/02/20 15:00:58 writing "tar-split.tar" to "tar-split.tar.out" pax_global_header pre: 512 read: 52 .travis.yml pre: 972 read: 374 DESIGN.md pre: 650 read: 1131 LICENSE pre: 917 read: 1075 README.md pre: 973 read: 4289 archive/ pre: 831 read: 0 archive/tar/ pre: 512 read: 0 archive/tar/common.go pre: 512 read: 7790 [...] tar/storage/entry_test.go pre: 667 read: 1137 tar/storage/getter.go pre: 911 read: 2741 tar/storage/getter_test.go pre: 843 read: 1491 tar/storage/packer.go pre: 557 read: 3141 tar/storage/packer_test.go pre: 955 read: 3096 EOF padding: 1512 Remainder: 512 Size: 215040; Sum: 215040 ``` *What are we seeing here?* * `pre` is the header of a file entry, and potentially the padding from the end of the prior file's payload. Also with particular tar extensions and pax attributes, the header can exceed 512 bytes. * `read` is the size of the file payload from the entry * `EOF padding` is the expected 1024 null bytes on the end of a tar archive, plus potential padding from the end of the prior file entry's payload * `Remainder` is the remaining bytes of an archive. This is typically deadspace as most tar implmentations will return after having reached the end of the 1024 null bytes. Though various implementations will include some amount of bytes here, which will affect the checksum of the resulting tar archive, therefore this must be accounted for as well. Ideally the input tar and output `*.out`, will match: ``` $ sha1sum tar-split.tar* ca9e19966b892d9ad5960414abac01ef585a1e22 tar-split.tar ca9e19966b892d9ad5960414abac01ef585a1e22 tar-split.tar.out ``` tar-split-0.9.10/concept/main.go000066400000000000000000000036421260131227100164260ustar00rootroot00000000000000// +build ignore package main import ( "flag" "fmt" "io" "io/ioutil" "log" "os" "github.com/vbatts/tar-split/archive/tar" ) func main() { flag.Parse() log.SetOutput(os.Stderr) for _, arg := range flag.Args() { func() { // Open the tar archive fh, err := os.Open(arg) if err != nil { log.Fatal(err, arg) } defer fh.Close() output, err := os.Create(fmt.Sprintf("%s.out", arg)) if err != nil { log.Fatal(err) } defer output.Close() log.Printf("writing %q to %q", fh.Name(), output.Name()) fi, err := fh.Stat() if err != nil { log.Fatal(err, fh.Name()) } size := fi.Size() var sum int64 tr := tar.NewReader(fh) tr.RawAccounting = true for { hdr, err := tr.Next() if err != nil { if err != io.EOF { log.Println(err) } // even when an EOF is reached, there is often 1024 null bytes on // the end of an archive. Collect them too. post := tr.RawBytes() output.Write(post) sum += int64(len(post)) fmt.Printf("EOF padding: %d\n", len(post)) break } pre := tr.RawBytes() output.Write(pre) sum += int64(len(pre)) var i int64 if i, err = io.Copy(output, tr); err != nil { log.Println(err) break } sum += i fmt.Println(hdr.Name, "pre:", len(pre), "read:", i) } // it is allowable, and not uncommon that there is further padding on the // end of an archive, apart from the expected 1024 null bytes remainder, err := ioutil.ReadAll(fh) if err != nil && err != io.EOF { log.Fatal(err, fh.Name()) } output.Write(remainder) sum += int64(len(remainder)) fmt.Printf("Remainder: %d\n", len(remainder)) if size != sum { fmt.Printf("Size: %d; Sum: %d; Diff: %d\n", size, sum, size-sum) fmt.Printf("Compare like `cmp -bl %s %s | less`\n", fh.Name(), output.Name()) } else { fmt.Printf("Size: %d; Sum: %d\n", size, sum) } }() } } tar-split-0.9.10/tar/000077500000000000000000000000001260131227100143015ustar00rootroot00000000000000tar-split-0.9.10/tar/asm/000077500000000000000000000000001260131227100150615ustar00rootroot00000000000000tar-split-0.9.10/tar/asm/README.md000066400000000000000000000030121260131227100163340ustar00rootroot00000000000000asm === This library for assembly and disassembly of tar archives, facilitated by `github.com/vbatts/tar-split/tar/storage`. Concerns -------- For completely safe assembly/disassembly, there will need to be a Content Addressable Storage (CAS) directory, that maps to a checksum in the `storage.Entity` of `storage.FileType`. This is due to the fact that tar archives _can_ allow multiple records for the same path, but the last one effectively wins. Even if the prior records had a different payload. In this way, when assembling an archive from relative paths, if the archive has multiple entries for the same path, then all payloads read in from a relative path would be identical. Thoughts -------- Have a look-aside directory or storage. This way when a clobbering record is encountered from the tar stream, then the payload of the prior/existing file is stored to the CAS. This way the clobbering record's file payload can be extracted, but we'll have preserved the payload needed to reassemble a precise tar archive. clobbered/path/to/file.[0-N] *alternatively* We could just _not_ support tar streams that have clobbering file paths. Appending records to the archive is not incredibly common, and doesn't happen by default for most implementations. Not supporting them wouldn't be a security concern either, as if it did occur, we would reassemble an archive that doesn't validate signature/checksum, so it shouldn't be trusted anyway. Otherwise, this will allow us to defer support for appended files as a FUTURE FEATURE. tar-split-0.9.10/tar/asm/assemble.go000066400000000000000000000032641260131227100172100ustar00rootroot00000000000000package asm import ( "bytes" "fmt" "hash/crc64" "io" "github.com/vbatts/tar-split/tar/storage" ) // NewOutputTarStream returns an io.ReadCloser that is an assembled tar archive // stream. // // It takes a storage.FileGetter, for mapping the file payloads that are to be read in, // and a storage.Unpacker, which has access to the rawbytes and file order // metadata. With the combination of these two items, a precise assembled Tar // archive is possible. func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadCloser { // ... Since these are interfaces, this is possible, so let's not have a nil pointer if fg == nil || up == nil { return nil } pr, pw := io.Pipe() go func() { for { entry, err := up.Next() if err != nil { pw.CloseWithError(err) return } switch entry.Type { case storage.SegmentType: if _, err := pw.Write(entry.Payload); err != nil { pw.CloseWithError(err) return } case storage.FileType: if entry.Size == 0 { continue } fh, err := fg.Get(entry.GetName()) if err != nil { pw.CloseWithError(err) return } c := crc64.New(storage.CRCTable) tRdr := io.TeeReader(fh, c) if _, err := io.Copy(pw, tRdr); err != nil { fh.Close() pw.CloseWithError(err) return } if !bytes.Equal(c.Sum(nil), entry.Payload) { // I would rather this be a comparable ErrInvalidChecksum or such, // but since it's coming through the PipeReader, the context of // _which_ file would be lost... fh.Close() pw.CloseWithError(fmt.Errorf("file integrity checksum failed for %q", entry.GetName())) return } fh.Close() } } }() return pr } tar-split-0.9.10/tar/asm/assemble_test.go000066400000000000000000000117441260131227100202510ustar00rootroot00000000000000package asm import ( "bytes" "compress/gzip" "crypto/sha1" "fmt" "hash/crc64" "io" "io/ioutil" "os" "testing" "github.com/vbatts/tar-split/tar/storage" ) var entries = []struct { Entry storage.Entry Body []byte }{ { Entry: storage.Entry{ Type: storage.FileType, Name: "./hurr.txt", Payload: []byte{2, 116, 164, 177, 171, 236, 107, 78}, Size: 20, }, Body: []byte("imma hurr til I derp"), }, { Entry: storage.Entry{ Type: storage.FileType, Name: "./ermahgerd.txt", Payload: []byte{126, 72, 89, 239, 230, 252, 160, 187}, Size: 26, }, Body: []byte("café con leche, por favor"), }, { Entry: storage.Entry{ Type: storage.FileType, NameRaw: []byte{0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe4}, // this is invalid UTF-8. Just checking the round trip. Payload: []byte{126, 72, 89, 239, 230, 252, 160, 187}, Size: 26, }, Body: []byte("café con leche, por favor"), }, } var entriesMangled = []struct { Entry storage.Entry Body []byte }{ { Entry: storage.Entry{ Type: storage.FileType, Name: "./hurr.txt", Payload: []byte{3, 116, 164, 177, 171, 236, 107, 78}, Size: 20, }, // switch Body: []byte("imma derp til I hurr"), }, { Entry: storage.Entry{ Type: storage.FileType, Name: "./ermahgerd.txt", Payload: []byte{127, 72, 89, 239, 230, 252, 160, 187}, Size: 26, }, // san not con Body: []byte("café sans leche, por favor"), }, { Entry: storage.Entry{ Type: storage.FileType, NameRaw: []byte{0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe4}, Payload: []byte{127, 72, 89, 239, 230, 252, 160, 187}, Size: 26, }, Body: []byte("café con leche, por favor"), }, } func TestTarStreamMangledGetterPutter(t *testing.T) { fgp := storage.NewBufferFileGetPutter() // first lets prep a GetPutter and Packer for i := range entries { if entries[i].Entry.Type == storage.FileType { j, csum, err := fgp.Put(entries[i].Entry.GetName(), bytes.NewBuffer(entries[i].Body)) if err != nil { t.Error(err) } if j != entries[i].Entry.Size { t.Errorf("size %q: expected %d; got %d", entries[i].Entry.GetName(), entries[i].Entry.Size, j) } if !bytes.Equal(csum, entries[i].Entry.Payload) { t.Errorf("checksum %q: expected %v; got %v", entries[i].Entry.GetName(), entries[i].Entry.Payload, csum) } } } for _, e := range entriesMangled { if e.Entry.Type == storage.FileType { rdr, err := fgp.Get(e.Entry.GetName()) if err != nil { t.Error(err) } c := crc64.New(storage.CRCTable) i, err := io.Copy(c, rdr) if err != nil { t.Fatal(err) } rdr.Close() csum := c.Sum(nil) if bytes.Equal(csum, e.Entry.Payload) { t.Errorf("wrote %d bytes. checksum for %q should not have matched! %v", i, e.Entry.GetName(), csum) } } } } func TestTarStream(t *testing.T) { testCases := []struct { path string expectedSHA1Sum string expectedSize int64 }{ {"./testdata/t.tar.gz", "1eb237ff69bca6e22789ecb05b45d35ca307adbd", 10240}, {"./testdata/longlink.tar.gz", "d9f6babe107b7247953dff6b5b5ae31a3a880add", 20480}, {"./testdata/fatlonglink.tar.gz", "8537f03f89aeef537382f8b0bb065d93e03b0be8", 26234880}, {"./testdata/iso-8859.tar.gz", "ddafa51cb03c74ec117ab366ee2240d13bba1ec3", 10240}, } for _, tc := range testCases { fh, err := os.Open(tc.path) if err != nil { t.Fatal(err) } defer fh.Close() gzRdr, err := gzip.NewReader(fh) if err != nil { t.Fatal(err) } defer gzRdr.Close() // Setup where we'll store the metadata w := bytes.NewBuffer([]byte{}) sp := storage.NewJSONPacker(w) fgp := storage.NewBufferFileGetPutter() // wrap the disassembly stream tarStream, err := NewInputTarStream(gzRdr, sp, fgp) if err != nil { t.Fatal(err) } // get a sum of the stream after it has passed through to ensure it's the same. h0 := sha1.New() tRdr0 := io.TeeReader(tarStream, h0) // read it all to the bit bucket i, err := io.Copy(ioutil.Discard, tRdr0) if err != nil { t.Fatal(err) } if i != tc.expectedSize { t.Errorf("size of tar: expected %d; got %d", tc.expectedSize, i) } if fmt.Sprintf("%x", h0.Sum(nil)) != tc.expectedSHA1Sum { t.Fatalf("checksum of tar: expected %s; got %x", tc.expectedSHA1Sum, h0.Sum(nil)) } //t.Logf("%s", w.String()) // if we fail, then show the packed info // If we've made it this far, then we'll turn it around and create a tar // stream from the packed metadata and buffered file contents. r := bytes.NewBuffer(w.Bytes()) sup := storage.NewJSONUnpacker(r) // and reuse the fgp that we Put the payloads to. rc := NewOutputTarStream(fgp, sup) h1 := sha1.New() i, err = io.Copy(h1, rc) if err != nil { t.Fatal(err) } if i != tc.expectedSize { t.Errorf("size of output tar: expected %d; got %d", tc.expectedSize, i) } if fmt.Sprintf("%x", h1.Sum(nil)) != tc.expectedSHA1Sum { t.Fatalf("checksum of output tar: expected %s; got %x", tc.expectedSHA1Sum, h1.Sum(nil)) } } } tar-split-0.9.10/tar/asm/disassemble.go000066400000000000000000000074111260131227100177060ustar00rootroot00000000000000package asm import ( "io" "io/ioutil" "github.com/vbatts/tar-split/archive/tar" "github.com/vbatts/tar-split/tar/storage" ) // NewInputTarStream wraps the Reader stream of a tar archive and provides a // Reader stream of the same. // // In the middle it will pack the segments and file metadata to storage.Packer // `p`. // // The the storage.FilePutter is where payload of files in the stream are // stashed. If this stashing is not needed, you can provide a nil // storage.FilePutter. Since the checksumming is still needed, then a default // of NewDiscardFilePutter will be used internally func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io.Reader, error) { // What to do here... folks will want their own access to the Reader that is // their tar archive stream, but we'll need that same stream to use our // forked 'archive/tar'. // Perhaps do an io.TeeReader that hands back an io.Reader for them to read // from, and we'll MITM the stream to store metadata. // We'll need a storage.FilePutter too ... // Another concern, whether to do any storage.FilePutter operations, such that we // don't extract any amount of the archive. But then again, we're not making // files/directories, hardlinks, etc. Just writing the io to the storage.FilePutter. // Perhaps we have a DiscardFilePutter that is a bit bucket. // we'll return the pipe reader, since TeeReader does not buffer and will // only read what the outputRdr Read's. Since Tar archives have padding on // the end, we want to be the one reading the padding, even if the user's // `archive/tar` doesn't care. pR, pW := io.Pipe() outputRdr := io.TeeReader(r, pW) // we need a putter that will generate the crc64 sums of file payloads if fp == nil { fp = storage.NewDiscardFilePutter() } go func() { tr := tar.NewReader(outputRdr) tr.RawAccounting = true for { hdr, err := tr.Next() if err != nil { if err != io.EOF { pW.CloseWithError(err) return } // even when an EOF is reached, there is often 1024 null bytes on // the end of an archive. Collect them too. if b := tr.RawBytes(); len(b) > 0 { _, err := p.AddEntry(storage.Entry{ Type: storage.SegmentType, Payload: b, }) if err != nil { pW.CloseWithError(err) return } } break // not return. We need the end of the reader. } if hdr == nil { break // not return. We need the end of the reader. } if b := tr.RawBytes(); len(b) > 0 { _, err := p.AddEntry(storage.Entry{ Type: storage.SegmentType, Payload: b, }) if err != nil { pW.CloseWithError(err) return } } var csum []byte if hdr.Size > 0 { var err error _, csum, err = fp.Put(hdr.Name, tr) if err != nil { pW.CloseWithError(err) return } } entry := storage.Entry{ Type: storage.FileType, Size: hdr.Size, Payload: csum, } // For proper marshalling of non-utf8 characters entry.SetName(hdr.Name) // File entries added, regardless of size _, err = p.AddEntry(entry) if err != nil { pW.CloseWithError(err) return } if b := tr.RawBytes(); len(b) > 0 { _, err = p.AddEntry(storage.Entry{ Type: storage.SegmentType, Payload: b, }) if err != nil { pW.CloseWithError(err) return } } } // it is allowable, and not uncommon that there is further padding on the // end of an archive, apart from the expected 1024 null bytes. remainder, err := ioutil.ReadAll(outputRdr) if err != nil && err != io.EOF { pW.CloseWithError(err) return } _, err = p.AddEntry(storage.Entry{ Type: storage.SegmentType, Payload: remainder, }) if err != nil { pW.CloseWithError(err) return } pW.Close() }() return pR, nil } tar-split-0.9.10/tar/asm/doc.go000066400000000000000000000004301260131227100161520ustar00rootroot00000000000000/* Package asm provides the API for streaming assembly and disassembly of tar archives. Using the `github.com/vbatts/tar-split/tar/storage` for Packing/Unpacking the metadata for a stream, as well as an implementation of Getting/Putting the file entries' payload. */ package asm tar-split-0.9.10/tar/asm/testdata/000077500000000000000000000000001260131227100166725ustar00rootroot00000000000000tar-split-0.9.10/tar/asm/testdata/fatlonglink.tar.gz000066400000000000000000000634421260131227100223420ustar00rootroot00000000000000Utest.tarkNP.+si6Q%c"up2`H6!N~O ]WcJ[UL͹ئ*ĘTե:պ_WwLC~5_LOʡ7͗_PZ';w]v1"?<wm*R )sM?`|;S?j9|MJ C0}oCknw;_1S\1N@x1,1/׮w|1<=I߾۶y:p&b=/!1Vպ~\vݾg_-~5_ݦ4 qQz6i%" k:> fAp>$(q>~)5Y^ysk_~.a"J%MY2)c)JM]X~]my~n *@2dPtar-split-0.9.10/tar/asm/testdata/iso-8859.tar.gz000066400000000000000000000002731260131227100212300ustar00rootroot00000000000000Viso-8859.tarA 0@Ѭ=E.&=HZ 1EFԍ mf 4,KFyvAH&E(Kh}>Rд6u솮!9Gm|ϧA|0V8&E7;]X$r(tar-split-0.9.10/tar/asm/testdata/longlink.tar.gz000066400000000000000000000006661260131227100216460ustar00rootroot00000000000000=Ulonglink.tarkj@@,$3yDZIKߤUX7fn$ԞāUӮWUM{u!0ֆY_[5Ţxl=wߴu!w>$k?up?@_] s.mc_X9@?h7Wp}C LUOySK@_!}d 3yP[*i!OֳW1!}ژa 0 will get // read into a resulting output stream (due to hardlinks). FileType Type = 1 + iota // SegmentType represents a raw bytes segment from the archive stream. These raw // byte segments consist of the raw headers and various padding. // // Its payload is to be marshalled base64 encoded. SegmentType ) // Entry is the structure for packing and unpacking the information read from // the Tar archive. // // FileType Payload checksum is using `hash/crc64` for basic file integrity, // _not_ for cryptography. // From http://www.backplane.com/matt/crc64.html, CRC32 has almost 40,000 // collisions in a sample of 18.2 million, CRC64 had none. type Entry struct { Type Type `json:"type"` Name string `json:"name,omitempty"` NameRaw []byte `json:"name_raw,omitempty"` Size int64 `json:"size,omitempty"` Payload []byte `json:"payload"` // SegmentType stores payload here; FileType stores crc64 checksum here; Position int `json:"position"` } // SetName will check name for valid UTF-8 string, and set the appropriate // field. See https://github.com/vbatts/tar-split/issues/17 func (e *Entry) SetName(name string) { if utf8.ValidString(name) { e.Name = name } else { e.NameRaw = []byte(name) } } // SetNameBytes will check name for valid UTF-8 string, and set the appropriate // field func (e *Entry) SetNameBytes(name []byte) { if utf8.Valid(name) { e.Name = string(name) } else { e.NameRaw = name } } // GetName returns the string for the entry's name, regardless of the field stored in func (e *Entry) GetName() string { if len(e.NameRaw) > 0 { return string(e.NameRaw) } return e.Name } // GetNameBytes returns the bytes for the entry's name, regardless of the field stored in func (e *Entry) GetNameBytes() []byte { if len(e.NameRaw) > 0 { return e.NameRaw } return []byte(e.Name) } tar-split-0.9.10/tar/storage/entry_test.go000066400000000000000000000034041260131227100204750ustar00rootroot00000000000000package storage import ( "encoding/json" "sort" "testing" ) func TestEntries(t *testing.T) { e := Entries{ Entry{ Type: SegmentType, Payload: []byte("y'all"), Position: 1, }, Entry{ Type: SegmentType, Payload: []byte("doin"), Position: 3, }, Entry{ Type: FileType, Name: "./hurr.txt", Payload: []byte("deadbeef"), Position: 2, }, Entry{ Type: SegmentType, Payload: []byte("how"), Position: 0, }, } sort.Sort(e) if e[0].Position != 0 { t.Errorf("expected Position 0, but got %d", e[0].Position) } } func TestFile(t *testing.T) { f := Entry{ Type: FileType, Size: 100, Position: 2, } f.SetName("./hello.txt") buf, err := json.Marshal(f) if err != nil { t.Fatal(err) } f1 := Entry{} if err = json.Unmarshal(buf, &f1); err != nil { t.Fatal(err) } if f.GetName() != f1.GetName() { t.Errorf("expected Name %q, got %q", f.GetName(), f1.GetName()) } if f.Size != f1.Size { t.Errorf("expected Size %q, got %q", f.Size, f1.Size) } if f.Position != f1.Position { t.Errorf("expected Position %q, got %q", f.Position, f1.Position) } } func TestFileRaw(t *testing.T) { f := Entry{ Type: FileType, Size: 100, Position: 2, } f.SetNameBytes([]byte{0x2E, 0x2F, 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0xE4, 0x2E, 0x74, 0x78, 0x74}) buf, err := json.Marshal(f) if err != nil { t.Fatal(err) } f1 := Entry{} if err = json.Unmarshal(buf, &f1); err != nil { t.Fatal(err) } if f.GetName() != f1.GetName() { t.Errorf("expected Name %q, got %q", f.GetName(), f1.GetName()) } if f.Size != f1.Size { t.Errorf("expected Size %q, got %q", f.Size, f1.Size) } if f.Position != f1.Position { t.Errorf("expected Position %q, got %q", f.Position, f1.Position) } } tar-split-0.9.10/tar/storage/getter.go000066400000000000000000000051071260131227100175710ustar00rootroot00000000000000package storage import ( "bytes" "errors" "hash/crc64" "io" "os" "path/filepath" ) // FileGetter is the interface for getting a stream of a file payload, // addressed by name/filename. Presumably, the names will be scoped to relative // file paths. type FileGetter interface { // Get returns a stream for the provided file path Get(filename string) (output io.ReadCloser, err error) } // FilePutter is the interface for storing a stream of a file payload, // addressed by name/filename. type FilePutter interface { // Put returns the size of the stream received, and the crc64 checksum for // the provided stream Put(filename string, input io.Reader) (size int64, checksum []byte, err error) } // FileGetPutter is the interface that groups both Getting and Putting file // payloads. type FileGetPutter interface { FileGetter FilePutter } // NewPathFileGetter returns a FileGetter that is for files relative to path // relpath. func NewPathFileGetter(relpath string) FileGetter { return &pathFileGetter{root: relpath} } type pathFileGetter struct { root string } func (pfg pathFileGetter) Get(filename string) (io.ReadCloser, error) { return os.Open(filepath.Join(pfg.root, filename)) } type bufferFileGetPutter struct { files map[string][]byte } func (bfgp bufferFileGetPutter) Get(name string) (io.ReadCloser, error) { if _, ok := bfgp.files[name]; !ok { return nil, errors.New("no such file") } b := bytes.NewBuffer(bfgp.files[name]) return &readCloserWrapper{b}, nil } func (bfgp *bufferFileGetPutter) Put(name string, r io.Reader) (int64, []byte, error) { crc := crc64.New(CRCTable) buf := bytes.NewBuffer(nil) cw := io.MultiWriter(crc, buf) i, err := io.Copy(cw, r) if err != nil { return 0, nil, err } bfgp.files[name] = buf.Bytes() return i, crc.Sum(nil), nil } type readCloserWrapper struct { io.Reader } func (w *readCloserWrapper) Close() error { return nil } // NewBufferFileGetPutter is a simple in-memory FileGetPutter // // Implication is this is memory intensive... // Probably best for testing or light weight cases. func NewBufferFileGetPutter() FileGetPutter { return &bufferFileGetPutter{ files: map[string][]byte{}, } } // NewDiscardFilePutter is a bit bucket FilePutter func NewDiscardFilePutter() FilePutter { return &bitBucketFilePutter{} } type bitBucketFilePutter struct { } func (bbfp *bitBucketFilePutter) Put(name string, r io.Reader) (int64, []byte, error) { c := crc64.New(CRCTable) i, err := io.Copy(c, r) return i, c.Sum(nil), err } // CRCTable is the default table used for crc64 sum calculations var CRCTable = crc64.MakeTable(crc64.ISO) tar-split-0.9.10/tar/storage/getter_test.go000066400000000000000000000036751260131227100206400ustar00rootroot00000000000000package storage import ( "bytes" "fmt" "io/ioutil" "strings" "testing" ) func TestGetter(t *testing.T) { fgp := NewBufferFileGetPutter() files := map[string]map[string][]byte{ "file1.txt": {"foo": []byte{60, 60, 48, 48, 0, 0, 0, 0}}, "file2.txt": {"bar": []byte{45, 196, 22, 240, 0, 0, 0, 0}}, } for n, b := range files { for body, sum := range b { _, csum, err := fgp.Put(n, bytes.NewBufferString(body)) if err != nil { t.Error(err) } if !bytes.Equal(csum, sum) { t.Errorf("checksum: expected 0x%x; got 0x%x", sum, csum) } } } for n, b := range files { for body := range b { r, err := fgp.Get(n) if err != nil { t.Error(err) } buf, err := ioutil.ReadAll(r) if err != nil { t.Error(err) } if body != string(buf) { t.Errorf("expected %q, got %q", body, string(buf)) } } } } func TestPutter(t *testing.T) { fp := NewDiscardFilePutter() // map[filename]map[body]crc64sum files := map[string]map[string][]byte{ "file1.txt": {"foo": []byte{60, 60, 48, 48, 0, 0, 0, 0}}, "file2.txt": {"bar": []byte{45, 196, 22, 240, 0, 0, 0, 0}}, "file3.txt": {"baz": []byte{32, 68, 22, 240, 0, 0, 0, 0}}, "file4.txt": {"bif": []byte{48, 9, 150, 240, 0, 0, 0, 0}}, } for n, b := range files { for body, sum := range b { _, csum, err := fp.Put(n, bytes.NewBufferString(body)) if err != nil { t.Error(err) } if !bytes.Equal(csum, sum) { t.Errorf("checksum on %q: expected %v; got %v", n, sum, csum) } } } } func BenchmarkPutter(b *testing.B) { files := []string{ strings.Repeat("foo", 1000), strings.Repeat("bar", 1000), strings.Repeat("baz", 1000), strings.Repeat("fooz", 1000), strings.Repeat("vbatts", 1000), strings.Repeat("systemd", 1000), } for i := 0; i < b.N; i++ { fgp := NewBufferFileGetPutter() for n, body := range files { if _, _, err := fgp.Put(fmt.Sprintf("%d", n), bytes.NewBufferString(body)); err != nil { b.Fatal(err) } } } } tar-split-0.9.10/tar/storage/packer.go000066400000000000000000000062271260131227100175500ustar00rootroot00000000000000package storage import ( "bufio" "encoding/json" "errors" "io" "path/filepath" "unicode/utf8" ) // ErrDuplicatePath occurs when a tar archive has more than one entry for the // same file path var ErrDuplicatePath = errors.New("duplicates of file paths not supported") // Packer describes the methods to pack Entries to a storage destination type Packer interface { // AddEntry packs the Entry and returns its position AddEntry(e Entry) (int, error) } // Unpacker describes the methods to read Entries from a source type Unpacker interface { // Next returns the next Entry being unpacked, or error, until io.EOF Next() (*Entry, error) } /* TODO(vbatts) figure out a good model for this type PackUnpacker interface { Packer Unpacker } */ type jsonUnpacker struct { r io.Reader b *bufio.Reader isEOF bool seen seenNames } func (jup *jsonUnpacker) Next() (*Entry, error) { var e Entry if jup.isEOF { // since ReadBytes() will return read bytes AND an EOF, we handle it this // round-a-bout way so we can Unmarshal the tail with relevant errors, but // still get an io.EOF when the stream is ended. return nil, io.EOF } line, err := jup.b.ReadBytes('\n') if err != nil && err != io.EOF { return nil, err } else if err == io.EOF { jup.isEOF = true } err = json.Unmarshal(line, &e) if err != nil && jup.isEOF { // if the remainder actually _wasn't_ a remaining json structure, then just EOF return nil, io.EOF } // check for dup name if e.Type == FileType { cName := filepath.Clean(e.GetName()) if _, ok := jup.seen[cName]; ok { return nil, ErrDuplicatePath } jup.seen[cName] = struct{}{} } return &e, err } // NewJSONUnpacker provides an Unpacker that reads Entries (SegmentType and // FileType) as a json document. // // Each Entry read are expected to be delimited by new line. func NewJSONUnpacker(r io.Reader) Unpacker { return &jsonUnpacker{ r: r, b: bufio.NewReader(r), seen: seenNames{}, } } type jsonPacker struct { w io.Writer e *json.Encoder pos int seen seenNames } type seenNames map[string]struct{} func (jp *jsonPacker) AddEntry(e Entry) (int, error) { // if Name is not valid utf8, switch it to raw first. if e.Name != "" { if !utf8.ValidString(e.Name) { e.NameRaw = []byte(e.Name) e.Name = "" } } // check early for dup name if e.Type == FileType { cName := filepath.Clean(e.GetName()) if _, ok := jp.seen[cName]; ok { return -1, ErrDuplicatePath } jp.seen[cName] = struct{}{} } e.Position = jp.pos err := jp.e.Encode(e) if err != nil { return -1, err } // made it this far, increment now jp.pos++ return e.Position, nil } // NewJSONPacker provides a Packer that writes each Entry (SegmentType and // FileType) as a json document. // // The Entries are delimited by new line. func NewJSONPacker(w io.Writer) Packer { return &jsonPacker{ w: w, e: json.NewEncoder(w), seen: seenNames{}, } } /* TODO(vbatts) perhaps have a more compact packer/unpacker, maybe using msgapck (https://github.com/ugorji/go) Even though, since our jsonUnpacker and jsonPacker just take io.Reader/io.Writer, then we can get away with passing them a gzip.Reader/gzip.Writer */ tar-split-0.9.10/tar/storage/packer_test.go000066400000000000000000000060301260131227100205770ustar00rootroot00000000000000package storage import ( "bytes" "compress/gzip" "io" "testing" ) func TestDuplicateFail(t *testing.T) { e := []Entry{ Entry{ Type: FileType, Name: "./hurr.txt", Payload: []byte("abcde"), }, Entry{ Type: FileType, Name: "./hurr.txt", Payload: []byte("deadbeef"), }, Entry{ Type: FileType, Name: "hurr.txt", // slightly different path, same file though Payload: []byte("deadbeef"), }, } buf := []byte{} b := bytes.NewBuffer(buf) jp := NewJSONPacker(b) if _, err := jp.AddEntry(e[0]); err != nil { t.Error(err) } if _, err := jp.AddEntry(e[1]); err != ErrDuplicatePath { t.Errorf("expected failure on duplicate path") } if _, err := jp.AddEntry(e[2]); err != ErrDuplicatePath { t.Errorf("expected failure on duplicate path") } } func TestJSONPackerUnpacker(t *testing.T) { e := []Entry{ Entry{ Type: SegmentType, Payload: []byte("how"), }, Entry{ Type: SegmentType, Payload: []byte("y'all"), }, Entry{ Type: FileType, Name: "./hurr.txt", Payload: []byte("deadbeef"), }, Entry{ Type: SegmentType, Payload: []byte("doin"), }, } buf := []byte{} b := bytes.NewBuffer(buf) func() { jp := NewJSONPacker(b) for i := range e { if _, err := jp.AddEntry(e[i]); err != nil { t.Error(err) } } }() // >> packer_test.go:43: uncompressed: 266 //t.Errorf("uncompressed: %d", len(b.Bytes())) b = bytes.NewBuffer(b.Bytes()) entries := Entries{} func() { jup := NewJSONUnpacker(b) for { entry, err := jup.Next() if err != nil { if err == io.EOF { break } t.Error(err) } entries = append(entries, *entry) t.Logf("got %#v", entry) } }() if len(entries) != len(e) { t.Errorf("expected %d entries, got %d", len(e), len(entries)) } } // you can use a compress Reader/Writer and make nice savings. // // For these two tests that are using the same set, it the difference of 266 // bytes uncompressed vs 138 bytes compressed. func TestGzip(t *testing.T) { e := []Entry{ Entry{ Type: SegmentType, Payload: []byte("how"), }, Entry{ Type: SegmentType, Payload: []byte("y'all"), }, Entry{ Type: FileType, Name: "./hurr.txt", Payload: []byte("deadbeef"), }, Entry{ Type: SegmentType, Payload: []byte("doin"), }, } buf := []byte{} b := bytes.NewBuffer(buf) gzW := gzip.NewWriter(b) jp := NewJSONPacker(gzW) for i := range e { if _, err := jp.AddEntry(e[i]); err != nil { t.Error(err) } } gzW.Close() // >> packer_test.go:99: compressed: 138 //t.Errorf("compressed: %d", len(b.Bytes())) b = bytes.NewBuffer(b.Bytes()) gzR, err := gzip.NewReader(b) if err != nil { t.Fatal(err) } entries := Entries{} func() { jup := NewJSONUnpacker(gzR) for { entry, err := jup.Next() if err != nil { if err == io.EOF { break } t.Error(err) } entries = append(entries, *entry) t.Logf("got %#v", entry) } }() if len(entries) != len(e) { t.Errorf("expected %d entries, got %d", len(e), len(entries)) } } tar-split-0.9.10/version/000077500000000000000000000000001260131227100152005ustar00rootroot00000000000000tar-split-0.9.10/version/gen.go000066400000000000000000000002261260131227100163000ustar00rootroot00000000000000package version // from `go get github.com/vbatts/go-get-version` //go:generate go-get-version -package version -variable VERSION -output version.go tar-split-0.9.10/version/version.go000066400000000000000000000003161260131227100172140ustar00rootroot00000000000000package version // AUTO-GENEREATED. DO NOT EDIT // 2015-08-14 09:56:50.742727493 -0400 EDT // VERSION is the generated version from /home/vbatts/src/vb/tar-split/version var VERSION = "v0.9.6-1-gc76e420"