pax_global_header00006660000000000000000000000064146753743340014532gustar00rootroot0000000000000052 comment=1f44cc4dc32346347cefbfe34c40166075385157 vbatts-tar-split-6881021/000077500000000000000000000000001467537433400151455ustar00rootroot00000000000000vbatts-tar-split-6881021/.github/000077500000000000000000000000001467537433400165055ustar00rootroot00000000000000vbatts-tar-split-6881021/.github/workflows/000077500000000000000000000000001467537433400205425ustar00rootroot00000000000000vbatts-tar-split-6881021/.github/workflows/go.yml000066400000000000000000000012451467537433400216740ustar00rootroot00000000000000name: build and vet on: pull_request: branches_ignore: [] jobs: build: runs-on: ubuntu-latest strategy: matrix: go: ['1.18', '1.19', '1.20', '1.21', '1.22'] name: build and vet steps: - uses: actions/checkout@v2 with: path: go/src/github.com/vbatts/tar-split - uses: actions/setup-go@v4 with: go-version: ${{ matrix.go }} - name: vet and build env: GOPATH: /home/runner/work/tar-split/tar-split/go run: | set -x export PATH=$GOPATH/bin:$PATH cd go/src/github.com/vbatts/tar-split go run mage.go -v vet build test vbatts-tar-split-6881021/.github/workflows/lint.yml000066400000000000000000000011421467537433400222310ustar00rootroot00000000000000name: lint on: pull_request: branches_ignore: [] jobs: lint: runs-on: ubuntu-latest strategy: matrix: go: ['1.20'] name: Linting steps: - uses: actions/checkout@v2 with: path: go/src/github.com/vbatts/tar-split - uses: actions/setup-go@v4 with: go-version: ${{ matrix.go }} - name: lint env: GOPATH: /home/runner/work/tar-split/tar-split/go run: | set -x export PATH=$GOPATH/bin:$PATH cd go/src/github.com/vbatts/tar-split go run mage.go -v lint vbatts-tar-split-6881021/LICENSE000066400000000000000000000027361467537433400161620ustar00rootroot00000000000000Copyright (c) 2015 Vincent Batts, Raleigh, NC, USA All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. vbatts-tar-split-6881021/README.md000066400000000000000000000111321467537433400164220ustar00rootroot00000000000000# tar-split ![Build Status](https://github.com/vbatts/tar-split/actions/workflows/go.yml/badge.svg) ![Lint](https://github.com/vbatts/tar-split/actions/workflows/lint.yml/badge.svg) [![Go Report Card](https://goreportcard.com/badge/github.com/vbatts/tar-split)](https://goreportcard.com/report/github.com/vbatts/tar-split) Pristinely disassembling a tar archive, and stashing needed raw bytes and offsets to reassemble a validating original archive. ## Docs Code API for libraries provided by `tar-split`: * [github.com/vbatts/tar-split/tar/asm](https://pkg.go.dev/github.com/vbatts/tar-split/tar/asm) * [github.com/vbatts/tar-split/tar/storage](https://pkg.go.dev/github.com/vbatts/tar-split/tar/storage) * [github.com/vbatts/tar-split/archive/tar](https://pkg.go.dev/github.com/vbatts/tar-split/archive/tar) ## Install The command line utility is installable via: ```bash go get github.com/vbatts/tar-split/cmd/tar-split ``` ## Usage For cli usage, see its [README.md](cmd/tar-split/README.md). For the library see the [docs](#docs) ## Demo ### Basic disassembly and assembly This demonstrates the `tar-split` command and how to assemble a tar archive from the `tar-data.json.gz` ![basic cmd demo thumbnail](https://i.ytimg.com/vi/vh5wyjIOBtc/2.jpg?time=1445027151805) [youtube video of basic command demo](https://youtu.be/vh5wyjIOBtc) ### Docker layer preservation This demonstrates the tar-split integration for docker-1.8. Providing consistent tar archives for the image layer content. ![docker tar-split demo](https://i.ytimg.com/vi_webp/vh5wyjIOBtc/default.webp) [youtube vide of docker layer checksums](https://youtu.be/tV_Dia8E8xw) ## Caveat Eventually this should detect TARs that this is not possible with. For example stored sparse files that have "holes" in them, will be read as a contiguous file, though the archive contents may be recorded in sparse format. Therefore when adding the file payload to a reassembled tar, to achieve identical output, the file payload would need be precisely re-sparsified. This is not something I seek to fix immediately, but would rather have an alert that precise reassembly is not possible. (see more http://www.gnu.org/software/tar/manual/html_node/Sparse-Formats.html) Other caveat, while tar archives support having multiple file entries for the same path, we will not support this feature. If there are more than one entries with the same path, expect an err (like `ErrDuplicatePath`) or a resulting tar stream that does not validate your original checksum/signature. ## Contract Do not break the API of stdlib `archive/tar` in our fork (ideally find an upstream mergeable solution). ## Std Version The version of golang stdlib `archive/tar` is from go1.11 It is minimally extended to expose the raw bytes of the TAR, rather than just the marshalled headers and file stream. ## Design See the [design](concept/DESIGN.md). ## Stored Metadata Since the raw bytes of the headers and padding are stored, you may be wondering what the size implications are. The headers are at least 512 bytes per file (sometimes more), at least 1024 null bytes on the end, and then various padding. This makes for a constant linear growth in the stored metadata, with a naive storage implementation. First we'll get an archive to work with. For repeatability, we'll make an archive from what you've just cloned: ```bash git archive --format=tar -o tar-split.tar HEAD . ``` ```bash $ go get github.com/vbatts/tar-split/cmd/tar-split $ tar-split checksize ./tar-split.tar inspecting "tar-split.tar" (size 210k) -- number of files: 50 -- size of metadata uncompressed: 53k -- size of gzip compressed metadata: 3k ``` So assuming you've managed the extraction of the archive yourself, for reuse of the file payloads from a relative path, then the only additional storage implications are as little as 3kb. But let's look at a larger archive, with many files. ```bash $ ls -sh ./d.tar 1.4G ./d.tar $ tar-split checksize ~/d.tar inspecting "/home/vbatts/d.tar" (size 1420749k) -- number of files: 38718 -- size of metadata uncompressed: 43261k -- size of gzip compressed metadata: 2251k ``` Here, an archive with 38,718 files has a compressed footprint of about 2mb. Rolling the null bytes on the end of the archive, we will assume a bytes-per-file rate for the storage implications. | uncompressed | compressed | | :----------: | :--------: | | ~ 1kb per/file | 0.06kb per/file | ## What's Next? * More implementations of storage Packer and Unpacker * More implementations of FileGetter and FilePutter * would be interesting to have an assembler stream that implements `io.Seeker` ## License See [LICENSE](LICENSE) vbatts-tar-split-6881021/archive/000077500000000000000000000000001467537433400165665ustar00rootroot00000000000000vbatts-tar-split-6881021/archive/tar/000077500000000000000000000000001467537433400173545ustar00rootroot00000000000000vbatts-tar-split-6881021/archive/tar/common.go000066400000000000000000000573471467537433400212130ustar00rootroot00000000000000// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package tar implements access to tar archives. // // Tape archives (tar) are a file format for storing a sequence of files that // can be read and written in a streaming manner. // This package aims to cover most variations of the format, // including those produced by GNU and BSD tar tools. package tar import ( "errors" "fmt" "math" "os" "path" "reflect" "strconv" "strings" "time" ) // BUG: Use of the Uid and Gid fields in Header could overflow on 32-bit // architectures. If a large value is encountered when decoding, the result // stored in Header will be the truncated version. var ( ErrHeader = errors.New("archive/tar: invalid tar header") ErrWriteTooLong = errors.New("archive/tar: write too long") ErrFieldTooLong = errors.New("archive/tar: header field too long") ErrWriteAfterClose = errors.New("archive/tar: write after close") errMissData = errors.New("archive/tar: sparse file references non-existent data") errUnrefData = errors.New("archive/tar: sparse file contains unreferenced data") errWriteHole = errors.New("archive/tar: write non-NUL byte in sparse hole") ) type headerError []string func (he headerError) Error() string { const prefix = "archive/tar: cannot encode header" var ss []string for _, s := range he { if s != "" { ss = append(ss, s) } } if len(ss) == 0 { return prefix } return fmt.Sprintf("%s: %v", prefix, strings.Join(ss, "; and ")) } // Type flags for Header.Typeflag. const ( // Type '0' indicates a regular file. TypeReg = '0' TypeRegA = '\x00' // Deprecated: Use TypeReg instead. // Type '1' to '6' are header-only flags and may not have a data body. TypeLink = '1' // Hard link TypeSymlink = '2' // Symbolic link TypeChar = '3' // Character device node TypeBlock = '4' // Block device node TypeDir = '5' // Directory TypeFifo = '6' // FIFO node // Type '7' is reserved. TypeCont = '7' // Type 'x' is used by the PAX format to store key-value records that // are only relevant to the next file. // This package transparently handles these types. TypeXHeader = 'x' // Type 'g' is used by the PAX format to store key-value records that // are relevant to all subsequent files. // This package only supports parsing and composing such headers, // but does not currently support persisting the global state across files. TypeXGlobalHeader = 'g' // Type 'S' indicates a sparse file in the GNU format. TypeGNUSparse = 'S' // Types 'L' and 'K' are used by the GNU format for a meta file // used to store the path or link name for the next file. // This package transparently handles these types. TypeGNULongName = 'L' TypeGNULongLink = 'K' ) // Keywords for PAX extended header records. const ( paxNone = "" // Indicates that no PAX key is suitable paxPath = "path" paxLinkpath = "linkpath" paxSize = "size" paxUid = "uid" paxGid = "gid" paxUname = "uname" paxGname = "gname" paxMtime = "mtime" paxAtime = "atime" paxCtime = "ctime" // Removed from later revision of PAX spec, but was valid paxCharset = "charset" // Currently unused paxComment = "comment" // Currently unused paxSchilyXattr = "SCHILY.xattr." // Keywords for GNU sparse files in a PAX extended header. paxGNUSparse = "GNU.sparse." paxGNUSparseNumBlocks = "GNU.sparse.numblocks" paxGNUSparseOffset = "GNU.sparse.offset" paxGNUSparseNumBytes = "GNU.sparse.numbytes" paxGNUSparseMap = "GNU.sparse.map" paxGNUSparseName = "GNU.sparse.name" paxGNUSparseMajor = "GNU.sparse.major" paxGNUSparseMinor = "GNU.sparse.minor" paxGNUSparseSize = "GNU.sparse.size" paxGNUSparseRealSize = "GNU.sparse.realsize" ) // basicKeys is a set of the PAX keys for which we have built-in support. // This does not contain "charset" or "comment", which are both PAX-specific, // so adding them as first-class features of Header is unlikely. // Users can use the PAXRecords field to set it themselves. var basicKeys = map[string]bool{ paxPath: true, paxLinkpath: true, paxSize: true, paxUid: true, paxGid: true, paxUname: true, paxGname: true, paxMtime: true, paxAtime: true, paxCtime: true, } // A Header represents a single header in a tar archive. // Some fields may not be populated. // // For forward compatibility, users that retrieve a Header from Reader.Next, // mutate it in some ways, and then pass it back to Writer.WriteHeader // should do so by creating a new Header and copying the fields // that they are interested in preserving. type Header struct { // Typeflag is the type of header entry. // The zero value is automatically promoted to either TypeReg or TypeDir // depending on the presence of a trailing slash in Name. Typeflag byte Name string // Name of file entry Linkname string // Target name of link (valid for TypeLink or TypeSymlink) Size int64 // Logical file size in bytes Mode int64 // Permission and mode bits Uid int // User ID of owner Gid int // Group ID of owner Uname string // User name of owner Gname string // Group name of owner // If the Format is unspecified, then Writer.WriteHeader rounds ModTime // to the nearest second and ignores the AccessTime and ChangeTime fields. // // To use AccessTime or ChangeTime, specify the Format as PAX or GNU. // To use sub-second resolution, specify the Format as PAX. ModTime time.Time // Modification time AccessTime time.Time // Access time (requires either PAX or GNU support) ChangeTime time.Time // Change time (requires either PAX or GNU support) Devmajor int64 // Major device number (valid for TypeChar or TypeBlock) Devminor int64 // Minor device number (valid for TypeChar or TypeBlock) // Xattrs stores extended attributes as PAX records under the // "SCHILY.xattr." namespace. // // The following are semantically equivalent: // h.Xattrs[key] = value // h.PAXRecords["SCHILY.xattr."+key] = value // // When Writer.WriteHeader is called, the contents of Xattrs will take // precedence over those in PAXRecords. // // Deprecated: Use PAXRecords instead. Xattrs map[string]string // PAXRecords is a map of PAX extended header records. // // User-defined records should have keys of the following form: // VENDOR.keyword // Where VENDOR is some namespace in all uppercase, and keyword may // not contain the '=' character (e.g., "GOLANG.pkg.version"). // The key and value should be non-empty UTF-8 strings. // // When Writer.WriteHeader is called, PAX records derived from the // other fields in Header take precedence over PAXRecords. PAXRecords map[string]string // Format specifies the format of the tar header. // // This is set by Reader.Next as a best-effort guess at the format. // Since the Reader liberally reads some non-compliant files, // it is possible for this to be FormatUnknown. // // If the format is unspecified when Writer.WriteHeader is called, // then it uses the first format (in the order of USTAR, PAX, GNU) // capable of encoding this Header (see Format). Format Format } // sparseEntry represents a Length-sized fragment at Offset in the file. type sparseEntry struct{ Offset, Length int64 } func (s sparseEntry) endOffset() int64 { return s.Offset + s.Length } // A sparse file can be represented as either a sparseDatas or a sparseHoles. // As long as the total size is known, they are equivalent and one can be // converted to the other form and back. The various tar formats with sparse // file support represent sparse files in the sparseDatas form. That is, they // specify the fragments in the file that has data, and treat everything else as // having zero bytes. As such, the encoding and decoding logic in this package // deals with sparseDatas. // // However, the external API uses sparseHoles instead of sparseDatas because the // zero value of sparseHoles logically represents a normal file (i.e., there are // no holes in it). On the other hand, the zero value of sparseDatas implies // that the file has no data in it, which is rather odd. // // As an example, if the underlying raw file contains the 10-byte data: // var compactFile = "abcdefgh" // // And the sparse map has the following entries: // var spd sparseDatas = []sparseEntry{ // {Offset: 2, Length: 5}, // Data fragment for 2..6 // {Offset: 18, Length: 3}, // Data fragment for 18..20 // } // var sph sparseHoles = []sparseEntry{ // {Offset: 0, Length: 2}, // Hole fragment for 0..1 // {Offset: 7, Length: 11}, // Hole fragment for 7..17 // {Offset: 21, Length: 4}, // Hole fragment for 21..24 // } // // Then the content of the resulting sparse file with a Header.Size of 25 is: // var sparseFile = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4 type ( sparseDatas []sparseEntry sparseHoles []sparseEntry ) // validateSparseEntries reports whether sp is a valid sparse map. // It does not matter whether sp represents data fragments or hole fragments. func validateSparseEntries(sp []sparseEntry, size int64) bool { // Validate all sparse entries. These are the same checks as performed by // the BSD tar utility. if size < 0 { return false } var pre sparseEntry for _, cur := range sp { switch { case cur.Offset < 0 || cur.Length < 0: return false // Negative values are never okay case cur.Offset > math.MaxInt64-cur.Length: return false // Integer overflow with large length case cur.endOffset() > size: return false // Region extends beyond the actual size case pre.endOffset() > cur.Offset: return false // Regions cannot overlap and must be in order } pre = cur } return true } // alignSparseEntries mutates src and returns dst where each fragment's // starting offset is aligned up to the nearest block edge, and each // ending offset is aligned down to the nearest block edge. // // Even though the Go tar Reader and the BSD tar utility can handle entries // with arbitrary offsets and lengths, the GNU tar utility can only handle // offsets and lengths that are multiples of blockSize. func alignSparseEntries(src []sparseEntry, size int64) []sparseEntry { dst := src[:0] for _, s := range src { pos, end := s.Offset, s.endOffset() pos += blockPadding(+pos) // Round-up to nearest blockSize if end != size { end -= blockPadding(-end) // Round-down to nearest blockSize } if pos < end { dst = append(dst, sparseEntry{Offset: pos, Length: end - pos}) } } return dst } // invertSparseEntries converts a sparse map from one form to the other. // If the input is sparseHoles, then it will output sparseDatas and vice-versa. // The input must have been already validated. // // This function mutates src and returns a normalized map where: // * adjacent fragments are coalesced together // * only the last fragment may be empty // * the endOffset of the last fragment is the total size func invertSparseEntries(src []sparseEntry, size int64) []sparseEntry { dst := src[:0] var pre sparseEntry for _, cur := range src { if cur.Length == 0 { continue // Skip empty fragments } pre.Length = cur.Offset - pre.Offset if pre.Length > 0 { dst = append(dst, pre) // Only add non-empty fragments } pre.Offset = cur.endOffset() } pre.Length = size - pre.Offset // Possibly the only empty fragment return append(dst, pre) } // fileState tracks the number of logical (includes sparse holes) and physical // (actual in tar archive) bytes remaining for the current file. // // Invariant: LogicalRemaining >= PhysicalRemaining type fileState interface { LogicalRemaining() int64 PhysicalRemaining() int64 } // allowedFormats determines which formats can be used. // The value returned is the logical OR of multiple possible formats. // If the value is FormatUnknown, then the input Header cannot be encoded // and an error is returned explaining why. // // As a by-product of checking the fields, this function returns paxHdrs, which // contain all fields that could not be directly encoded. // A value receiver ensures that this method does not mutate the source Header. func (h Header) allowedFormats() (format Format, paxHdrs map[string]string, err error) { format = FormatUSTAR | FormatPAX | FormatGNU paxHdrs = make(map[string]string) var whyNoUSTAR, whyNoPAX, whyNoGNU string var preferPAX bool // Prefer PAX over USTAR verifyString := func(s string, size int, name, paxKey string) { // NUL-terminator is optional for path and linkpath. // Technically, it is required for uname and gname, // but neither GNU nor BSD tar checks for it. tooLong := len(s) > size allowLongGNU := paxKey == paxPath || paxKey == paxLinkpath if hasNUL(s) || (tooLong && !allowLongGNU) { whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%q", name, s) format.mustNotBe(FormatGNU) } if !isASCII(s) || tooLong { canSplitUSTAR := paxKey == paxPath if _, _, ok := splitUSTARPath(s); !canSplitUSTAR || !ok { whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%q", name, s) format.mustNotBe(FormatUSTAR) } if paxKey == paxNone { whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%q", name, s) format.mustNotBe(FormatPAX) } else { paxHdrs[paxKey] = s } } if v, ok := h.PAXRecords[paxKey]; ok && v == s { paxHdrs[paxKey] = v } } verifyNumeric := func(n int64, size int, name, paxKey string) { if !fitsInBase256(size, n) { whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%d", name, n) format.mustNotBe(FormatGNU) } if !fitsInOctal(size, n) { whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%d", name, n) format.mustNotBe(FormatUSTAR) if paxKey == paxNone { whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%d", name, n) format.mustNotBe(FormatPAX) } else { paxHdrs[paxKey] = strconv.FormatInt(n, 10) } } if v, ok := h.PAXRecords[paxKey]; ok && v == strconv.FormatInt(n, 10) { paxHdrs[paxKey] = v } } verifyTime := func(ts time.Time, size int, name, paxKey string) { if ts.IsZero() { return // Always okay } if !fitsInBase256(size, ts.Unix()) { whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%v", name, ts) format.mustNotBe(FormatGNU) } isMtime := paxKey == paxMtime fitsOctal := fitsInOctal(size, ts.Unix()) if (isMtime && !fitsOctal) || !isMtime { whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%v", name, ts) format.mustNotBe(FormatUSTAR) } needsNano := ts.Nanosecond() != 0 if !isMtime || !fitsOctal || needsNano { preferPAX = true // USTAR may truncate sub-second measurements if paxKey == paxNone { whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%v", name, ts) format.mustNotBe(FormatPAX) } else { paxHdrs[paxKey] = formatPAXTime(ts) } } if v, ok := h.PAXRecords[paxKey]; ok && v == formatPAXTime(ts) { paxHdrs[paxKey] = v } } // Check basic fields. var blk block v7 := blk.V7() ustar := blk.USTAR() gnu := blk.GNU() verifyString(h.Name, len(v7.Name()), "Name", paxPath) verifyString(h.Linkname, len(v7.LinkName()), "Linkname", paxLinkpath) verifyString(h.Uname, len(ustar.UserName()), "Uname", paxUname) verifyString(h.Gname, len(ustar.GroupName()), "Gname", paxGname) verifyNumeric(h.Mode, len(v7.Mode()), "Mode", paxNone) verifyNumeric(int64(h.Uid), len(v7.UID()), "Uid", paxUid) verifyNumeric(int64(h.Gid), len(v7.GID()), "Gid", paxGid) verifyNumeric(h.Size, len(v7.Size()), "Size", paxSize) verifyNumeric(h.Devmajor, len(ustar.DevMajor()), "Devmajor", paxNone) verifyNumeric(h.Devminor, len(ustar.DevMinor()), "Devminor", paxNone) verifyTime(h.ModTime, len(v7.ModTime()), "ModTime", paxMtime) verifyTime(h.AccessTime, len(gnu.AccessTime()), "AccessTime", paxAtime) verifyTime(h.ChangeTime, len(gnu.ChangeTime()), "ChangeTime", paxCtime) // Check for header-only types. var whyOnlyPAX, whyOnlyGNU string switch h.Typeflag { case TypeReg, TypeChar, TypeBlock, TypeFifo, TypeGNUSparse: // Exclude TypeLink and TypeSymlink, since they may reference directories. if strings.HasSuffix(h.Name, "/") { return FormatUnknown, nil, headerError{"filename may not have trailing slash"} } case TypeXHeader, TypeGNULongName, TypeGNULongLink: return FormatUnknown, nil, headerError{"cannot manually encode TypeXHeader, TypeGNULongName, or TypeGNULongLink headers"} case TypeXGlobalHeader: h2 := Header{Name: h.Name, Typeflag: h.Typeflag, Xattrs: h.Xattrs, PAXRecords: h.PAXRecords, Format: h.Format} if !reflect.DeepEqual(h, h2) { return FormatUnknown, nil, headerError{"only PAXRecords should be set for TypeXGlobalHeader"} } whyOnlyPAX = "only PAX supports TypeXGlobalHeader" format.mayOnlyBe(FormatPAX) } if !isHeaderOnlyType(h.Typeflag) && h.Size < 0 { return FormatUnknown, nil, headerError{"negative size on header-only type"} } // Check PAX records. if len(h.Xattrs) > 0 { for k, v := range h.Xattrs { paxHdrs[paxSchilyXattr+k] = v } whyOnlyPAX = "only PAX supports Xattrs" format.mayOnlyBe(FormatPAX) } if len(h.PAXRecords) > 0 { for k, v := range h.PAXRecords { switch _, exists := paxHdrs[k]; { case exists: continue // Do not overwrite existing records case h.Typeflag == TypeXGlobalHeader: paxHdrs[k] = v // Copy all records case !basicKeys[k] && !strings.HasPrefix(k, paxGNUSparse): paxHdrs[k] = v // Ignore local records that may conflict } } whyOnlyPAX = "only PAX supports PAXRecords" format.mayOnlyBe(FormatPAX) } for k, v := range paxHdrs { if !validPAXRecord(k, v) { return FormatUnknown, nil, headerError{fmt.Sprintf("invalid PAX record: %q", k+" = "+v)} } } // TODO(dsnet): Re-enable this when adding sparse support. // See https://golang.org/issue/22735 /* // Check sparse files. if len(h.SparseHoles) > 0 || h.Typeflag == TypeGNUSparse { if isHeaderOnlyType(h.Typeflag) { return FormatUnknown, nil, headerError{"header-only type cannot be sparse"} } if !validateSparseEntries(h.SparseHoles, h.Size) { return FormatUnknown, nil, headerError{"invalid sparse holes"} } if h.Typeflag == TypeGNUSparse { whyOnlyGNU = "only GNU supports TypeGNUSparse" format.mayOnlyBe(FormatGNU) } else { whyNoGNU = "GNU supports sparse files only with TypeGNUSparse" format.mustNotBe(FormatGNU) } whyNoUSTAR = "USTAR does not support sparse files" format.mustNotBe(FormatUSTAR) } */ // Check desired format. if wantFormat := h.Format; wantFormat != FormatUnknown { if wantFormat.has(FormatPAX) && !preferPAX { wantFormat.mayBe(FormatUSTAR) // PAX implies USTAR allowed too } format.mayOnlyBe(wantFormat) // Set union of formats allowed and format wanted } if format == FormatUnknown { switch h.Format { case FormatUSTAR: err = headerError{"Format specifies USTAR", whyNoUSTAR, whyOnlyPAX, whyOnlyGNU} case FormatPAX: err = headerError{"Format specifies PAX", whyNoPAX, whyOnlyGNU} case FormatGNU: err = headerError{"Format specifies GNU", whyNoGNU, whyOnlyPAX} default: err = headerError{whyNoUSTAR, whyNoPAX, whyNoGNU, whyOnlyPAX, whyOnlyGNU} } } return format, paxHdrs, err } // FileInfo returns an os.FileInfo for the Header. func (h *Header) FileInfo() os.FileInfo { return headerFileInfo{h} } // headerFileInfo implements os.FileInfo. type headerFileInfo struct { h *Header } func (fi headerFileInfo) Size() int64 { return fi.h.Size } func (fi headerFileInfo) IsDir() bool { return fi.Mode().IsDir() } func (fi headerFileInfo) ModTime() time.Time { return fi.h.ModTime } func (fi headerFileInfo) Sys() interface{} { return fi.h } // Name returns the base name of the file. func (fi headerFileInfo) Name() string { if fi.IsDir() { return path.Base(path.Clean(fi.h.Name)) } return path.Base(fi.h.Name) } // Mode returns the permission and mode bits for the headerFileInfo. func (fi headerFileInfo) Mode() (mode os.FileMode) { // Set file permission bits. mode = os.FileMode(fi.h.Mode).Perm() // Set setuid, setgid and sticky bits. if fi.h.Mode&c_ISUID != 0 { mode |= os.ModeSetuid } if fi.h.Mode&c_ISGID != 0 { mode |= os.ModeSetgid } if fi.h.Mode&c_ISVTX != 0 { mode |= os.ModeSticky } // Set file mode bits; clear perm, setuid, setgid, and sticky bits. switch m := os.FileMode(fi.h.Mode) &^ 07777; m { case c_ISDIR: mode |= os.ModeDir case c_ISFIFO: mode |= os.ModeNamedPipe case c_ISLNK: mode |= os.ModeSymlink case c_ISBLK: mode |= os.ModeDevice case c_ISCHR: mode |= os.ModeDevice mode |= os.ModeCharDevice case c_ISSOCK: mode |= os.ModeSocket } switch fi.h.Typeflag { case TypeSymlink: mode |= os.ModeSymlink case TypeChar: mode |= os.ModeDevice mode |= os.ModeCharDevice case TypeBlock: mode |= os.ModeDevice case TypeDir: mode |= os.ModeDir case TypeFifo: mode |= os.ModeNamedPipe } return mode } // sysStat, if non-nil, populates h from system-dependent fields of fi. var sysStat func(fi os.FileInfo, h *Header) error const ( // Mode constants from the USTAR spec: // See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06 c_ISUID = 04000 // Set uid c_ISGID = 02000 // Set gid c_ISVTX = 01000 // Save text (sticky bit) // Common Unix mode constants; these are not defined in any common tar standard. // Header.FileInfo understands these, but FileInfoHeader will never produce these. c_ISDIR = 040000 // Directory c_ISFIFO = 010000 // FIFO c_ISREG = 0100000 // Regular file c_ISLNK = 0120000 // Symbolic link c_ISBLK = 060000 // Block special file c_ISCHR = 020000 // Character special file c_ISSOCK = 0140000 // Socket ) // FileInfoHeader creates a partially-populated Header from fi. // If fi describes a symlink, FileInfoHeader records link as the link target. // If fi describes a directory, a slash is appended to the name. // // Since os.FileInfo's Name method only returns the base name of // the file it describes, it may be necessary to modify Header.Name // to provide the full path name of the file. func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) { if fi == nil { return nil, errors.New("archive/tar: FileInfo is nil") } fm := fi.Mode() h := &Header{ Name: fi.Name(), ModTime: fi.ModTime(), Mode: int64(fm.Perm()), // or'd with c_IS* constants later } switch { case fm.IsRegular(): h.Typeflag = TypeReg h.Size = fi.Size() case fi.IsDir(): h.Typeflag = TypeDir h.Name += "/" case fm&os.ModeSymlink != 0: h.Typeflag = TypeSymlink h.Linkname = link case fm&os.ModeDevice != 0: if fm&os.ModeCharDevice != 0 { h.Typeflag = TypeChar } else { h.Typeflag = TypeBlock } case fm&os.ModeNamedPipe != 0: h.Typeflag = TypeFifo case fm&os.ModeSocket != 0: return nil, fmt.Errorf("archive/tar: sockets not supported") default: return nil, fmt.Errorf("archive/tar: unknown file mode %v", fm) } if fm&os.ModeSetuid != 0 { h.Mode |= c_ISUID } if fm&os.ModeSetgid != 0 { h.Mode |= c_ISGID } if fm&os.ModeSticky != 0 { h.Mode |= c_ISVTX } // If possible, populate additional fields from OS-specific // FileInfo fields. if sys, ok := fi.Sys().(*Header); ok { // This FileInfo came from a Header (not the OS). Use the // original Header to populate all remaining fields. h.Uid = sys.Uid h.Gid = sys.Gid h.Uname = sys.Uname h.Gname = sys.Gname h.AccessTime = sys.AccessTime h.ChangeTime = sys.ChangeTime if sys.Xattrs != nil { h.Xattrs = make(map[string]string) for k, v := range sys.Xattrs { h.Xattrs[k] = v } } if sys.Typeflag == TypeLink { // hard link h.Typeflag = TypeLink h.Size = 0 h.Linkname = sys.Linkname } if sys.PAXRecords != nil { h.PAXRecords = make(map[string]string) for k, v := range sys.PAXRecords { h.PAXRecords[k] = v } } } if sysStat != nil { return h, sysStat(fi, h) } return h, nil } // isHeaderOnlyType checks if the given type flag is of the type that has no // data section even if a size is specified. func isHeaderOnlyType(flag byte) bool { switch flag { case TypeLink, TypeSymlink, TypeChar, TypeBlock, TypeDir, TypeFifo: return true default: return false } } func min(a, b int64) int64 { if a < b { return a } return b } vbatts-tar-split-6881021/archive/tar/example_test.go000066400000000000000000000027041467537433400224000ustar00rootroot00000000000000// Copyright 2013 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package tar_test import ( "archive/tar" "bytes" "fmt" "io" "log" "os" ) func Example_minimal() { // Create and add some files to the archive. var buf bytes.Buffer tw := tar.NewWriter(&buf) var files = []struct { Name, Body string }{ {"readme.txt", "This archive contains some text files."}, {"gopher.txt", "Gopher names:\nGeorge\nGeoffrey\nGonzo"}, {"todo.txt", "Get animal handling license."}, } for _, file := range files { hdr := &tar.Header{ Name: file.Name, Mode: 0600, Size: int64(len(file.Body)), } if err := tw.WriteHeader(hdr); err != nil { log.Fatal(err) } if _, err := tw.Write([]byte(file.Body)); err != nil { log.Fatal(err) } } if err := tw.Close(); err != nil { log.Fatal(err) } // Open and iterate through the files in the archive. tr := tar.NewReader(&buf) for { hdr, err := tr.Next() if err == io.EOF { break // End of archive } if err != nil { log.Fatal(err) } fmt.Printf("Contents of %s:\n", hdr.Name) if _, err := io.Copy(os.Stdout, tr); err != nil { log.Fatal(err) } fmt.Println() } // Output: // Contents of readme.txt: // This archive contains some text files. // Contents of gopher.txt: // Gopher names: // George // Geoffrey // Gonzo // Contents of todo.txt: // Get animal handling license. } vbatts-tar-split-6881021/archive/tar/format.go000066400000000000000000000261431467537433400212010ustar00rootroot00000000000000// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package tar import "strings" // Format represents the tar archive format. // // The original tar format was introduced in Unix V7. // Since then, there have been multiple competing formats attempting to // standardize or extend the V7 format to overcome its limitations. // The most common formats are the USTAR, PAX, and GNU formats, // each with their own advantages and limitations. // // The following table captures the capabilities of each format: // // | USTAR | PAX | GNU // ------------------+--------+-----------+---------- // Name | 256B | unlimited | unlimited // Linkname | 100B | unlimited | unlimited // Size | uint33 | unlimited | uint89 // Mode | uint21 | uint21 | uint57 // Uid/Gid | uint21 | unlimited | uint57 // Uname/Gname | 32B | unlimited | 32B // ModTime | uint33 | unlimited | int89 // AccessTime | n/a | unlimited | int89 // ChangeTime | n/a | unlimited | int89 // Devmajor/Devminor | uint21 | uint21 | uint57 // ------------------+--------+-----------+---------- // string encoding | ASCII | UTF-8 | binary // sub-second times | no | yes | no // sparse files | no | yes | yes // // The table's upper portion shows the Header fields, where each format reports // the maximum number of bytes allowed for each string field and // the integer type used to store each numeric field // (where timestamps are stored as the number of seconds since the Unix epoch). // // The table's lower portion shows specialized features of each format, // such as supported string encodings, support for sub-second timestamps, // or support for sparse files. // // The Writer currently provides no support for sparse files. type Format int // Constants to identify various tar formats. const ( // Deliberately hide the meaning of constants from public API. _ Format = (1 << iota) / 4 // Sequence of 0, 0, 1, 2, 4, 8, etc... // FormatUnknown indicates that the format is unknown. FormatUnknown // The format of the original Unix V7 tar tool prior to standardization. formatV7 // FormatUSTAR represents the USTAR header format defined in POSIX.1-1988. // // While this format is compatible with most tar readers, // the format has several limitations making it unsuitable for some usages. // Most notably, it cannot support sparse files, files larger than 8GiB, // filenames larger than 256 characters, and non-ASCII filenames. // // Reference: // http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06 FormatUSTAR // FormatPAX represents the PAX header format defined in POSIX.1-2001. // // PAX extends USTAR by writing a special file with Typeflag TypeXHeader // preceding the original header. This file contains a set of key-value // records, which are used to overcome USTAR's shortcomings, in addition to // providing the ability to have sub-second resolution for timestamps. // // Some newer formats add their own extensions to PAX by defining their // own keys and assigning certain semantic meaning to the associated values. // For example, sparse file support in PAX is implemented using keys // defined by the GNU manual (e.g., "GNU.sparse.map"). // // Reference: // http://pubs.opengroup.org/onlinepubs/009695399/utilities/pax.html FormatPAX // FormatGNU represents the GNU header format. // // The GNU header format is older than the USTAR and PAX standards and // is not compatible with them. The GNU format supports // arbitrary file sizes, filenames of arbitrary encoding and length, // sparse files, and other features. // // It is recommended that PAX be chosen over GNU unless the target // application can only parse GNU formatted archives. // // Reference: // https://www.gnu.org/software/tar/manual/html_node/Standard.html FormatGNU // Schily's tar format, which is incompatible with USTAR. // This does not cover STAR extensions to the PAX format; these fall under // the PAX format. formatSTAR formatMax ) func (f Format) has(f2 Format) bool { return f&f2 != 0 } func (f *Format) mayBe(f2 Format) { *f |= f2 } func (f *Format) mayOnlyBe(f2 Format) { *f &= f2 } func (f *Format) mustNotBe(f2 Format) { *f &^= f2 } var formatNames = map[Format]string{ formatV7: "V7", FormatUSTAR: "USTAR", FormatPAX: "PAX", FormatGNU: "GNU", formatSTAR: "STAR", } func (f Format) String() string { var ss []string for f2 := Format(1); f2 < formatMax; f2 <<= 1 { if f.has(f2) { ss = append(ss, formatNames[f2]) } } switch len(ss) { case 0: return "" case 1: return ss[0] default: return "(" + strings.Join(ss, " | ") + ")" } } // Magics used to identify various formats. const ( magicGNU, versionGNU = "ustar ", " \x00" magicUSTAR, versionUSTAR = "ustar\x00", "00" trailerSTAR = "tar\x00" ) // Size constants from various tar specifications. const ( blockSize = 512 // Size of each block in a tar stream nameSize = 100 // Max length of the name field in USTAR format prefixSize = 155 // Max length of the prefix field in USTAR format ) // blockPadding computes the number of bytes needed to pad offset up to the // nearest block edge where 0 <= n < blockSize. func blockPadding(offset int64) (n int64) { return -offset & (blockSize - 1) } var zeroBlock block type block [blockSize]byte // Convert block to any number of formats. func (b *block) V7() *headerV7 { return (*headerV7)(b) } func (b *block) GNU() *headerGNU { return (*headerGNU)(b) } func (b *block) STAR() *headerSTAR { return (*headerSTAR)(b) } func (b *block) USTAR() *headerUSTAR { return (*headerUSTAR)(b) } func (b *block) Sparse() sparseArray { return (sparseArray)(b[:]) } // GetFormat checks that the block is a valid tar header based on the checksum. // It then attempts to guess the specific format based on magic values. // If the checksum fails, then FormatUnknown is returned. func (b *block) GetFormat() Format { // Verify checksum. var p parser value := p.parseOctal(b.V7().Chksum()) chksum1, chksum2 := b.ComputeChecksum() if p.err != nil || (value != chksum1 && value != chksum2) { return FormatUnknown } // Guess the magic values. magic := string(b.USTAR().Magic()) version := string(b.USTAR().Version()) trailer := string(b.STAR().Trailer()) switch { case magic == magicUSTAR && trailer == trailerSTAR: return formatSTAR case magic == magicUSTAR: return FormatUSTAR | FormatPAX case magic == magicGNU && version == versionGNU: return FormatGNU default: return formatV7 } } // SetFormat writes the magic values necessary for specified format // and then updates the checksum accordingly. func (b *block) SetFormat(format Format) { // Set the magic values. switch { case format.has(formatV7): // Do nothing. case format.has(FormatGNU): copy(b.GNU().Magic(), magicGNU) copy(b.GNU().Version(), versionGNU) case format.has(formatSTAR): copy(b.STAR().Magic(), magicUSTAR) copy(b.STAR().Version(), versionUSTAR) copy(b.STAR().Trailer(), trailerSTAR) case format.has(FormatUSTAR | FormatPAX): copy(b.USTAR().Magic(), magicUSTAR) copy(b.USTAR().Version(), versionUSTAR) default: panic("invalid format") } // Update checksum. // This field is special in that it is terminated by a NULL then space. var f formatter field := b.V7().Chksum() chksum, _ := b.ComputeChecksum() // Possible values are 256..128776 f.formatOctal(field[:7], chksum) // Never fails since 128776 < 262143 field[7] = ' ' } // ComputeChecksum computes the checksum for the header block. // POSIX specifies a sum of the unsigned byte values, but the Sun tar used // signed byte values. // We compute and return both. func (b *block) ComputeChecksum() (unsigned, signed int64) { for i, c := range b { if 148 <= i && i < 156 { c = ' ' // Treat the checksum field itself as all spaces. } unsigned += int64(c) signed += int64(int8(c)) } return unsigned, signed } // Reset clears the block with all zeros. func (b *block) Reset() { *b = block{} } type headerV7 [blockSize]byte func (h *headerV7) Name() []byte { return h[000:][:100] } func (h *headerV7) Mode() []byte { return h[100:][:8] } func (h *headerV7) UID() []byte { return h[108:][:8] } func (h *headerV7) GID() []byte { return h[116:][:8] } func (h *headerV7) Size() []byte { return h[124:][:12] } func (h *headerV7) ModTime() []byte { return h[136:][:12] } func (h *headerV7) Chksum() []byte { return h[148:][:8] } func (h *headerV7) TypeFlag() []byte { return h[156:][:1] } func (h *headerV7) LinkName() []byte { return h[157:][:100] } type headerGNU [blockSize]byte func (h *headerGNU) V7() *headerV7 { return (*headerV7)(h) } func (h *headerGNU) Magic() []byte { return h[257:][:6] } func (h *headerGNU) Version() []byte { return h[263:][:2] } func (h *headerGNU) UserName() []byte { return h[265:][:32] } func (h *headerGNU) GroupName() []byte { return h[297:][:32] } func (h *headerGNU) DevMajor() []byte { return h[329:][:8] } func (h *headerGNU) DevMinor() []byte { return h[337:][:8] } func (h *headerGNU) AccessTime() []byte { return h[345:][:12] } func (h *headerGNU) ChangeTime() []byte { return h[357:][:12] } func (h *headerGNU) Sparse() sparseArray { return (sparseArray)(h[386:][:24*4+1]) } func (h *headerGNU) RealSize() []byte { return h[483:][:12] } type headerSTAR [blockSize]byte func (h *headerSTAR) V7() *headerV7 { return (*headerV7)(h) } func (h *headerSTAR) Magic() []byte { return h[257:][:6] } func (h *headerSTAR) Version() []byte { return h[263:][:2] } func (h *headerSTAR) UserName() []byte { return h[265:][:32] } func (h *headerSTAR) GroupName() []byte { return h[297:][:32] } func (h *headerSTAR) DevMajor() []byte { return h[329:][:8] } func (h *headerSTAR) DevMinor() []byte { return h[337:][:8] } func (h *headerSTAR) Prefix() []byte { return h[345:][:131] } func (h *headerSTAR) AccessTime() []byte { return h[476:][:12] } func (h *headerSTAR) ChangeTime() []byte { return h[488:][:12] } func (h *headerSTAR) Trailer() []byte { return h[508:][:4] } type headerUSTAR [blockSize]byte func (h *headerUSTAR) V7() *headerV7 { return (*headerV7)(h) } func (h *headerUSTAR) Magic() []byte { return h[257:][:6] } func (h *headerUSTAR) Version() []byte { return h[263:][:2] } func (h *headerUSTAR) UserName() []byte { return h[265:][:32] } func (h *headerUSTAR) GroupName() []byte { return h[297:][:32] } func (h *headerUSTAR) DevMajor() []byte { return h[329:][:8] } func (h *headerUSTAR) DevMinor() []byte { return h[337:][:8] } func (h *headerUSTAR) Prefix() []byte { return h[345:][:155] } type sparseArray []byte func (s sparseArray) Entry(i int) sparseElem { return (sparseElem)(s[i*24:]) } func (s sparseArray) IsExtended() []byte { return s[24*s.MaxEntries():][:1] } func (s sparseArray) MaxEntries() int { return len(s) / 24 } type sparseElem []byte func (s sparseElem) Offset() []byte { return s[00:][:12] } func (s sparseElem) Length() []byte { return s[12:][:12] } vbatts-tar-split-6881021/archive/tar/reader.go000066400000000000000000000666511467537433400211630ustar00rootroot00000000000000// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package tar import ( "bytes" "io" "strconv" "strings" "time" ) // Reader provides sequential access to the contents of a tar archive. // Reader.Next advances to the next file in the archive (including the first), // and then Reader can be treated as an io.Reader to access the file's data. type Reader struct { r io.Reader pad int64 // Amount of padding (ignored) after current file entry curr fileReader // Reader for current file entry blk block // Buffer to use as temporary local storage // err is a persistent error. // It is only the responsibility of every exported method of Reader to // ensure that this error is sticky. err error RawAccounting bool // Whether to enable the access needed to reassemble the tar from raw bytes. Some performance/memory hit for this. rawBytes *bytes.Buffer // last raw bits } type fileReader interface { io.Reader fileState WriteTo(io.Writer) (int64, error) } // RawBytes accesses the raw bytes of the archive, apart from the file payload itself. // This includes the header and padding. // // # This call resets the current rawbytes buffer // // Only when RawAccounting is enabled, otherwise this returns nil func (tr *Reader) RawBytes() []byte { if !tr.RawAccounting { return nil } if tr.rawBytes == nil { tr.rawBytes = bytes.NewBuffer(nil) } defer tr.rawBytes.Reset() // if we've read them, then flush them. return tr.rawBytes.Bytes() } // ExpectedPadding returns the number of bytes of padding expected after the last header returned by Next() func (tr *Reader) ExpectedPadding() int64 { return tr.pad } // NewReader creates a new Reader reading from r. func NewReader(r io.Reader) *Reader { return &Reader{r: r, curr: ®FileReader{r, 0}} } // Next advances to the next entry in the tar archive. // The Header.Size determines how many bytes can be read for the next file. // Any remaining data in the current file is automatically discarded. // // io.EOF is returned at the end of the input. func (tr *Reader) Next() (*Header, error) { if tr.err != nil { return nil, tr.err } hdr, err := tr.next() tr.err = err return hdr, err } func (tr *Reader) next() (*Header, error) { var paxHdrs map[string]string var gnuLongName, gnuLongLink string if tr.RawAccounting { if tr.rawBytes == nil { tr.rawBytes = bytes.NewBuffer(nil) } else { tr.rawBytes.Reset() } } // Externally, Next iterates through the tar archive as if it is a series of // files. Internally, the tar format often uses fake "files" to add meta // data that describes the next file. These meta data "files" should not // normally be visible to the outside. As such, this loop iterates through // one or more "header files" until it finds a "normal file". format := FormatUSTAR | FormatPAX | FormatGNU for { // Discard the remainder of the file and any padding. if err := discard(tr, tr.curr.PhysicalRemaining()); err != nil { return nil, err } n, err := tryReadFull(tr.r, tr.blk[:tr.pad]) if err != nil { return nil, err } if tr.RawAccounting { tr.rawBytes.Write(tr.blk[:n]) } tr.pad = 0 hdr, rawHdr, err := tr.readHeader() if err != nil { return nil, err } if err := tr.handleRegularFile(hdr); err != nil { return nil, err } format.mayOnlyBe(hdr.Format) // Check for PAX/GNU special headers and files. switch hdr.Typeflag { case TypeXHeader, TypeXGlobalHeader: format.mayOnlyBe(FormatPAX) paxHdrs, err = parsePAX(tr) if err != nil { return nil, err } if hdr.Typeflag == TypeXGlobalHeader { if err = mergePAX(hdr, paxHdrs); err != nil { return nil, err } return &Header{ Name: hdr.Name, Typeflag: hdr.Typeflag, Xattrs: hdr.Xattrs, PAXRecords: hdr.PAXRecords, Format: format, }, nil } continue // This is a meta header affecting the next header case TypeGNULongName, TypeGNULongLink: format.mayOnlyBe(FormatGNU) realname, err := io.ReadAll(tr) if err != nil { return nil, err } if tr.RawAccounting { tr.rawBytes.Write(realname) } var p parser switch hdr.Typeflag { case TypeGNULongName: gnuLongName = p.parseString(realname) case TypeGNULongLink: gnuLongLink = p.parseString(realname) } continue // This is a meta header affecting the next header default: // The old GNU sparse format is handled here since it is technically // just a regular file with additional attributes. if err := mergePAX(hdr, paxHdrs); err != nil { return nil, err } if gnuLongName != "" { hdr.Name = gnuLongName } if gnuLongLink != "" { hdr.Linkname = gnuLongLink } if hdr.Typeflag == TypeRegA { if strings.HasSuffix(hdr.Name, "/") { hdr.Typeflag = TypeDir // Legacy archives use trailing slash for directories } else { hdr.Typeflag = TypeReg } } // The extended headers may have updated the size. // Thus, setup the regFileReader again after merging PAX headers. if err := tr.handleRegularFile(hdr); err != nil { return nil, err } // Sparse formats rely on being able to read from the logical data // section; there must be a preceding call to handleRegularFile. if err := tr.handleSparseFile(hdr, rawHdr); err != nil { return nil, err } // Set the final guess at the format. if format.has(FormatUSTAR) && format.has(FormatPAX) { format.mayOnlyBe(FormatUSTAR) } hdr.Format = format return hdr, nil // This is a file, so stop } } } // handleRegularFile sets up the current file reader and padding such that it // can only read the following logical data section. It will properly handle // special headers that contain no data section. func (tr *Reader) handleRegularFile(hdr *Header) error { nb := hdr.Size if isHeaderOnlyType(hdr.Typeflag) { nb = 0 } if nb < 0 { return ErrHeader } tr.pad = blockPadding(nb) tr.curr = ®FileReader{r: tr.r, nb: nb} return nil } // handleSparseFile checks if the current file is a sparse format of any type // and sets the curr reader appropriately. func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block) error { var spd sparseDatas var err error if hdr.Typeflag == TypeGNUSparse { spd, err = tr.readOldGNUSparseMap(hdr, rawHdr) } else { spd, err = tr.readGNUSparsePAXHeaders(hdr) } // If sp is non-nil, then this is a sparse file. // Note that it is possible for len(sp) == 0. if err == nil && spd != nil { if isHeaderOnlyType(hdr.Typeflag) || !validateSparseEntries(spd, hdr.Size) { return ErrHeader } sph := invertSparseEntries(spd, hdr.Size) tr.curr = &sparseFileReader{tr.curr, sph, 0} } return err } // readGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. // If they are found, then this function reads the sparse map and returns it. // This assumes that 0.0 headers have already been converted to 0.1 headers // by the PAX header parsing logic. func (tr *Reader) readGNUSparsePAXHeaders(hdr *Header) (sparseDatas, error) { // Identify the version of GNU headers. var is1x0 bool major, minor := hdr.PAXRecords[paxGNUSparseMajor], hdr.PAXRecords[paxGNUSparseMinor] switch { case major == "0" && (minor == "0" || minor == "1"): is1x0 = false case major == "1" && minor == "0": is1x0 = true case major != "" || minor != "": return nil, nil // Unknown GNU sparse PAX version case hdr.PAXRecords[paxGNUSparseMap] != "": is1x0 = false // 0.0 and 0.1 did not have explicit version records, so guess default: return nil, nil // Not a PAX format GNU sparse file. } hdr.Format.mayOnlyBe(FormatPAX) // Update hdr from GNU sparse PAX headers. if name := hdr.PAXRecords[paxGNUSparseName]; name != "" { hdr.Name = name } size := hdr.PAXRecords[paxGNUSparseSize] if size == "" { size = hdr.PAXRecords[paxGNUSparseRealSize] } if size != "" { n, err := strconv.ParseInt(size, 10, 64) if err != nil { return nil, ErrHeader } hdr.Size = n } // Read the sparse map according to the appropriate format. if is1x0 { return readGNUSparseMap1x0(tr.curr) } return readGNUSparseMap0x1(hdr.PAXRecords) } // mergePAX merges paxHdrs into hdr for all relevant fields of Header. func mergePAX(hdr *Header, paxHdrs map[string]string) (err error) { for k, v := range paxHdrs { if v == "" { continue // Keep the original USTAR value } var id64 int64 switch k { case paxPath: hdr.Name = v case paxLinkpath: hdr.Linkname = v case paxUname: hdr.Uname = v case paxGname: hdr.Gname = v case paxUid: id64, err = strconv.ParseInt(v, 10, 64) hdr.Uid = int(id64) // Integer overflow possible case paxGid: id64, err = strconv.ParseInt(v, 10, 64) hdr.Gid = int(id64) // Integer overflow possible case paxAtime: hdr.AccessTime, err = parsePAXTime(v) case paxMtime: hdr.ModTime, err = parsePAXTime(v) case paxCtime: hdr.ChangeTime, err = parsePAXTime(v) case paxSize: hdr.Size, err = strconv.ParseInt(v, 10, 64) default: if strings.HasPrefix(k, paxSchilyXattr) { if hdr.Xattrs == nil { hdr.Xattrs = make(map[string]string) } hdr.Xattrs[k[len(paxSchilyXattr):]] = v } } if err != nil { return ErrHeader } } hdr.PAXRecords = paxHdrs return nil } // parsePAX parses PAX headers. // If an extended header (type 'x') is invalid, ErrHeader is returned func parsePAX(r io.Reader) (map[string]string, error) { buf, err := io.ReadAll(r) if err != nil { return nil, err } // leaving this function for io.Reader makes it more testable if tr, ok := r.(*Reader); ok && tr.RawAccounting { if _, err = tr.rawBytes.Write(buf); err != nil { return nil, err } } sbuf := string(buf) // For GNU PAX sparse format 0.0 support. // This function transforms the sparse format 0.0 headers into format 0.1 // headers since 0.0 headers were not PAX compliant. var sparseMap []string paxHdrs := make(map[string]string) for len(sbuf) > 0 { key, value, residual, err := parsePAXRecord(sbuf) if err != nil { return nil, ErrHeader } sbuf = residual switch key { case paxGNUSparseOffset, paxGNUSparseNumBytes: // Validate sparse header order and value. if (len(sparseMap)%2 == 0 && key != paxGNUSparseOffset) || (len(sparseMap)%2 == 1 && key != paxGNUSparseNumBytes) || strings.Contains(value, ",") { return nil, ErrHeader } sparseMap = append(sparseMap, value) default: paxHdrs[key] = value } } if len(sparseMap) > 0 { paxHdrs[paxGNUSparseMap] = strings.Join(sparseMap, ",") } return paxHdrs, nil } // readHeader reads the next block header and assumes that the underlying reader // is already aligned to a block boundary. It returns the raw block of the // header in case further processing is required. // // The err will be set to io.EOF only when one of the following occurs: // - Exactly 0 bytes are read and EOF is hit. // - Exactly 1 block of zeros is read and EOF is hit. // - At least 2 blocks of zeros are read. func (tr *Reader) readHeader() (*Header, *block, error) { // Two blocks of zero bytes marks the end of the archive. n, err := io.ReadFull(tr.r, tr.blk[:]) if tr.RawAccounting && (err == nil || err == io.EOF) { tr.rawBytes.Write(tr.blk[:n]) } if err != nil { return nil, nil, err // EOF is okay here; exactly 0 bytes read } if bytes.Equal(tr.blk[:], zeroBlock[:]) { n, err = io.ReadFull(tr.r, tr.blk[:]) if tr.RawAccounting && (err == nil || err == io.EOF) { tr.rawBytes.Write(tr.blk[:n]) } if err != nil { return nil, nil, err // EOF is okay here; exactly 1 block of zeros read } if bytes.Equal(tr.blk[:], zeroBlock[:]) { return nil, nil, io.EOF // normal EOF; exactly 2 block of zeros read } return nil, nil, ErrHeader // Zero block and then non-zero block } // Verify the header matches a known format. format := tr.blk.GetFormat() if format == FormatUnknown { return nil, nil, ErrHeader } var p parser hdr := new(Header) // Unpack the V7 header. v7 := tr.blk.V7() hdr.Typeflag = v7.TypeFlag()[0] hdr.Name = p.parseString(v7.Name()) hdr.Linkname = p.parseString(v7.LinkName()) hdr.Size = p.parseNumeric(v7.Size()) hdr.Mode = p.parseNumeric(v7.Mode()) hdr.Uid = int(p.parseNumeric(v7.UID())) hdr.Gid = int(p.parseNumeric(v7.GID())) hdr.ModTime = time.Unix(p.parseNumeric(v7.ModTime()), 0) // Unpack format specific fields. if format > formatV7 { ustar := tr.blk.USTAR() hdr.Uname = p.parseString(ustar.UserName()) hdr.Gname = p.parseString(ustar.GroupName()) hdr.Devmajor = p.parseNumeric(ustar.DevMajor()) hdr.Devminor = p.parseNumeric(ustar.DevMinor()) var prefix string switch { case format.has(FormatUSTAR | FormatPAX): hdr.Format = format ustar := tr.blk.USTAR() prefix = p.parseString(ustar.Prefix()) // For Format detection, check if block is properly formatted since // the parser is more liberal than what USTAR actually permits. notASCII := func(r rune) bool { return r >= 0x80 } if bytes.IndexFunc(tr.blk[:], notASCII) >= 0 { hdr.Format = FormatUnknown // Non-ASCII characters in block. } nul := func(b []byte) bool { return int(b[len(b)-1]) == 0 } if !(nul(v7.Size()) && nul(v7.Mode()) && nul(v7.UID()) && nul(v7.GID()) && nul(v7.ModTime()) && nul(ustar.DevMajor()) && nul(ustar.DevMinor())) { hdr.Format = FormatUnknown // Numeric fields must end in NUL } case format.has(formatSTAR): star := tr.blk.STAR() prefix = p.parseString(star.Prefix()) hdr.AccessTime = time.Unix(p.parseNumeric(star.AccessTime()), 0) hdr.ChangeTime = time.Unix(p.parseNumeric(star.ChangeTime()), 0) case format.has(FormatGNU): hdr.Format = format var p2 parser gnu := tr.blk.GNU() if b := gnu.AccessTime(); b[0] != 0 { hdr.AccessTime = time.Unix(p2.parseNumeric(b), 0) } if b := gnu.ChangeTime(); b[0] != 0 { hdr.ChangeTime = time.Unix(p2.parseNumeric(b), 0) } // Prior to Go1.8, the Writer had a bug where it would output // an invalid tar file in certain rare situations because the logic // incorrectly believed that the old GNU format had a prefix field. // This is wrong and leads to an output file that mangles the // atime and ctime fields, which are often left unused. // // In order to continue reading tar files created by former, buggy // versions of Go, we skeptically parse the atime and ctime fields. // If we are unable to parse them and the prefix field looks like // an ASCII string, then we fallback on the pre-Go1.8 behavior // of treating these fields as the USTAR prefix field. // // Note that this will not use the fallback logic for all possible // files generated by a pre-Go1.8 toolchain. If the generated file // happened to have a prefix field that parses as valid // atime and ctime fields (e.g., when they are valid octal strings), // then it is impossible to distinguish between an valid GNU file // and an invalid pre-Go1.8 file. // // See https://golang.org/issues/12594 // See https://golang.org/issues/21005 if p2.err != nil { hdr.AccessTime, hdr.ChangeTime = time.Time{}, time.Time{} ustar := tr.blk.USTAR() if s := p.parseString(ustar.Prefix()); isASCII(s) { prefix = s } hdr.Format = FormatUnknown // Buggy file is not GNU } } if len(prefix) > 0 { hdr.Name = prefix + "/" + hdr.Name } } return hdr, &tr.blk, p.err } // readOldGNUSparseMap reads the sparse map from the old GNU sparse format. // The sparse map is stored in the tar header if it's small enough. // If it's larger than four entries, then one or more extension headers are used // to store the rest of the sparse map. // // The Header.Size does not reflect the size of any extended headers used. // Thus, this function will read from the raw io.Reader to fetch extra headers. // This method mutates blk in the process. func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) (sparseDatas, error) { // Make sure that the input format is GNU. // Unfortunately, the STAR format also has a sparse header format that uses // the same type flag but has a completely different layout. if blk.GetFormat() != FormatGNU { return nil, ErrHeader } hdr.Format.mayOnlyBe(FormatGNU) var p parser hdr.Size = p.parseNumeric(blk.GNU().RealSize()) if p.err != nil { return nil, p.err } s := blk.GNU().Sparse() spd := make(sparseDatas, 0, s.MaxEntries()) for { for i := 0; i < s.MaxEntries(); i++ { // This termination condition is identical to GNU and BSD tar. if s.Entry(i).Offset()[0] == 0x00 { break // Don't return, need to process extended headers (even if empty) } offset := p.parseNumeric(s.Entry(i).Offset()) length := p.parseNumeric(s.Entry(i).Length()) if p.err != nil { return nil, p.err } spd = append(spd, sparseEntry{Offset: offset, Length: length}) } if s.IsExtended()[0] > 0 { // There are more entries. Read an extension header and parse its entries. if _, err := mustReadFull(tr.r, blk[:]); err != nil { return nil, err } if tr.RawAccounting { tr.rawBytes.Write(blk[:]) } s = blk.Sparse() continue } return spd, nil // Done } } // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format // version 1.0. The format of the sparse map consists of a series of // newline-terminated numeric fields. The first field is the number of entries // and is always present. Following this are the entries, consisting of two // fields (offset, length). This function must stop reading at the end // boundary of the block containing the last newline. // // Note that the GNU manual says that numeric values should be encoded in octal // format. However, the GNU tar utility itself outputs these values in decimal. // As such, this library treats values as being encoded in decimal. func readGNUSparseMap1x0(r io.Reader) (sparseDatas, error) { var ( cntNewline int64 buf bytes.Buffer blk block ) // feedTokens copies data in blocks from r into buf until there are // at least cnt newlines in buf. It will not read more blocks than needed. feedTokens := func(n int64) error { for cntNewline < n { if _, err := mustReadFull(r, blk[:]); err != nil { return err } buf.Write(blk[:]) for _, c := range blk { if c == '\n' { cntNewline++ } } } return nil } // nextToken gets the next token delimited by a newline. This assumes that // at least one newline exists in the buffer. nextToken := func() string { cntNewline-- tok, _ := buf.ReadString('\n') return strings.TrimRight(tok, "\n") } // Parse for the number of entries. // Use integer overflow resistant math to check this. if err := feedTokens(1); err != nil { return nil, err } numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { return nil, ErrHeader } // Parse for all member entries. // numEntries is trusted after this since a potential attacker must have // committed resources proportional to what this library used. if err := feedTokens(2 * numEntries); err != nil { return nil, err } spd := make(sparseDatas, 0, numEntries) for i := int64(0); i < numEntries; i++ { offset, err1 := strconv.ParseInt(nextToken(), 10, 64) length, err2 := strconv.ParseInt(nextToken(), 10, 64) if err1 != nil || err2 != nil { return nil, ErrHeader } spd = append(spd, sparseEntry{Offset: offset, Length: length}) } return spd, nil } // readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format // version 0.1. The sparse map is stored in the PAX headers. func readGNUSparseMap0x1(paxHdrs map[string]string) (sparseDatas, error) { // Get number of entries. // Use integer overflow resistant math to check this. numEntriesStr := paxHdrs[paxGNUSparseNumBlocks] numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { return nil, ErrHeader } // There should be two numbers in sparseMap for each entry. sparseMap := strings.Split(paxHdrs[paxGNUSparseMap], ",") if len(sparseMap) == 1 && sparseMap[0] == "" { sparseMap = sparseMap[:0] } if int64(len(sparseMap)) != 2*numEntries { return nil, ErrHeader } // Loop through the entries in the sparse map. // numEntries is trusted now. spd := make(sparseDatas, 0, numEntries) for len(sparseMap) >= 2 { offset, err1 := strconv.ParseInt(sparseMap[0], 10, 64) length, err2 := strconv.ParseInt(sparseMap[1], 10, 64) if err1 != nil || err2 != nil { return nil, ErrHeader } spd = append(spd, sparseEntry{Offset: offset, Length: length}) sparseMap = sparseMap[2:] } return spd, nil } // Read reads from the current file in the tar archive. // It returns (0, io.EOF) when it reaches the end of that file, // until Next is called to advance to the next file. // // If the current file is sparse, then the regions marked as a hole // are read back as NUL-bytes. // // Calling Read on special types like TypeLink, TypeSymlink, TypeChar, // TypeBlock, TypeDir, and TypeFifo returns (0, io.EOF) regardless of what // the Header.Size claims. func (tr *Reader) Read(b []byte) (int, error) { if tr.err != nil { return 0, tr.err } n, err := tr.curr.Read(b) if err != nil && err != io.EOF { tr.err = err } return n, err } // writeTo writes the content of the current file to w. // The bytes written matches the number of remaining bytes in the current file. // // If the current file is sparse and w is an io.WriteSeeker, // then writeTo uses Seek to skip past holes defined in Header.SparseHoles, // assuming that skipped regions are filled with NULs. // This always writes the last byte to ensure w is the right size. // // TODO(dsnet): Re-export this when adding sparse file support. // See https://golang.org/issue/22735 func (tr *Reader) writeTo(w io.Writer) (int64, error) { if tr.err != nil { return 0, tr.err } n, err := tr.curr.WriteTo(w) if err != nil { tr.err = err } return n, err } // regFileReader is a fileReader for reading data from a regular file entry. type regFileReader struct { r io.Reader // Underlying Reader nb int64 // Number of remaining bytes to read } func (fr *regFileReader) Read(b []byte) (n int, err error) { if int64(len(b)) > fr.nb { b = b[:fr.nb] } if len(b) > 0 { n, err = fr.r.Read(b) fr.nb -= int64(n) } switch { case err == io.EOF && fr.nb > 0: return n, io.ErrUnexpectedEOF case err == nil && fr.nb == 0: return n, io.EOF default: return n, err } } func (fr *regFileReader) WriteTo(w io.Writer) (int64, error) { return io.Copy(w, struct{ io.Reader }{fr}) } func (fr regFileReader) LogicalRemaining() int64 { return fr.nb } func (fr regFileReader) PhysicalRemaining() int64 { return fr.nb } // sparseFileReader is a fileReader for reading data from a sparse file entry. type sparseFileReader struct { fr fileReader // Underlying fileReader sp sparseHoles // Normalized list of sparse holes pos int64 // Current position in sparse file } func (sr *sparseFileReader) Read(b []byte) (n int, err error) { finished := int64(len(b)) >= sr.LogicalRemaining() if finished { b = b[:sr.LogicalRemaining()] } b0 := b endPos := sr.pos + int64(len(b)) for endPos > sr.pos && err == nil { var nf int // Bytes read in fragment holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset() if sr.pos < holeStart { // In a data fragment bf := b[:min(int64(len(b)), holeStart-sr.pos)] nf, err = tryReadFull(sr.fr, bf) } else { // In a hole fragment bf := b[:min(int64(len(b)), holeEnd-sr.pos)] nf, err = tryReadFull(zeroReader{}, bf) } b = b[nf:] sr.pos += int64(nf) if sr.pos >= holeEnd && len(sr.sp) > 1 { sr.sp = sr.sp[1:] // Ensure last fragment always remains } } n = len(b0) - len(b) switch { case err == io.EOF: return n, errMissData // Less data in dense file than sparse file case err != nil: return n, err case sr.LogicalRemaining() == 0 && sr.PhysicalRemaining() > 0: return n, errUnrefData // More data in dense file than sparse file case finished: return n, io.EOF default: return n, nil } } func (sr *sparseFileReader) WriteTo(w io.Writer) (n int64, err error) { ws, ok := w.(io.WriteSeeker) if ok { if _, err := ws.Seek(0, io.SeekCurrent); err != nil { ok = false // Not all io.Seeker can really seek } } if !ok { return io.Copy(w, struct{ io.Reader }{sr}) } var writeLastByte bool pos0 := sr.pos for sr.LogicalRemaining() > 0 && !writeLastByte && err == nil { var nf int64 // Size of fragment holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset() if sr.pos < holeStart { // In a data fragment nf = holeStart - sr.pos nf, err = io.CopyN(ws, sr.fr, nf) } else { // In a hole fragment nf = holeEnd - sr.pos if sr.PhysicalRemaining() == 0 { writeLastByte = true nf-- } _, err = ws.Seek(nf, io.SeekCurrent) } sr.pos += nf if sr.pos >= holeEnd && len(sr.sp) > 1 { sr.sp = sr.sp[1:] // Ensure last fragment always remains } } // If the last fragment is a hole, then seek to 1-byte before EOF, and // write a single byte to ensure the file is the right size. if writeLastByte && err == nil { _, err = ws.Write([]byte{0}) sr.pos++ } n = sr.pos - pos0 switch { case err == io.EOF: return n, errMissData // Less data in dense file than sparse file case err != nil: return n, err case sr.LogicalRemaining() == 0 && sr.PhysicalRemaining() > 0: return n, errUnrefData // More data in dense file than sparse file default: return n, nil } } func (sr sparseFileReader) LogicalRemaining() int64 { return sr.sp[len(sr.sp)-1].endOffset() - sr.pos } func (sr sparseFileReader) PhysicalRemaining() int64 { return sr.fr.PhysicalRemaining() } type zeroReader struct{} func (zeroReader) Read(b []byte) (int, error) { for i := range b { b[i] = 0 } return len(b), nil } // mustReadFull is like io.ReadFull except it returns // io.ErrUnexpectedEOF when io.EOF is hit before len(b) bytes are read. func mustReadFull(r io.Reader, b []byte) (int, error) { n, err := tryReadFull(r, b) if err == io.EOF { err = io.ErrUnexpectedEOF } return n, err } // tryReadFull is like io.ReadFull except it returns // io.EOF when it is hit before len(b) bytes are read. func tryReadFull(r io.Reader, b []byte) (n int, err error) { for len(b) > n && err == nil { var nn int nn, err = r.Read(b[n:]) n += nn } if len(b) == n && err == io.EOF { err = nil } return n, err } // discard skips n bytes in r, reporting an error if unable to do so. func discard(tr *Reader, n int64) error { var seekSkipped, copySkipped int64 var err error r := tr.r if tr.RawAccounting { copySkipped, err = io.CopyN(tr.rawBytes, tr.r, n) goto out } // If possible, Seek to the last byte before the end of the data section. // Do this because Seek is often lazy about reporting errors; this will mask // the fact that the stream may be truncated. We can rely on the // io.CopyN done shortly afterwards to trigger any IO errors. if sr, ok := r.(io.Seeker); ok && n > 1 { // Not all io.Seeker can actually Seek. For example, os.Stdin implements // io.Seeker, but calling Seek always returns an error and performs // no action. Thus, we try an innocent seek to the current position // to see if Seek is really supported. pos1, err := sr.Seek(0, io.SeekCurrent) if pos1 >= 0 && err == nil { // Seek seems supported, so perform the real Seek. pos2, err := sr.Seek(n-1, io.SeekCurrent) if pos2 < 0 || err != nil { return err } seekSkipped = pos2 - pos1 } } copySkipped, err = io.CopyN(io.Discard, r, n-seekSkipped) out: if err == io.EOF && seekSkipped+copySkipped < n { err = io.ErrUnexpectedEOF } return err } vbatts-tar-split-6881021/archive/tar/reader_test.go000066400000000000000000001332151467537433400222110ustar00rootroot00000000000000// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package tar import ( "bytes" "crypto/md5" "errors" "fmt" "io" "math" "os" "path" "reflect" "strconv" "strings" "testing" "time" ) func TestReader(t *testing.T) { vectors := []struct { file string // Test input file headers []*Header // Expected output headers chksums []string // MD5 checksum of files, leave as nil if not checked err error // Expected error to occur }{{ file: "testdata/gnu.tar", headers: []*Header{{ Name: "small.txt", Mode: 0640, Uid: 73025, Gid: 5000, Size: 5, ModTime: time.Unix(1244428340, 0), Typeflag: '0', Uname: "dsymonds", Gname: "eng", Format: FormatGNU, }, { Name: "small2.txt", Mode: 0640, Uid: 73025, Gid: 5000, Size: 11, ModTime: time.Unix(1244436044, 0), Typeflag: '0', Uname: "dsymonds", Gname: "eng", Format: FormatGNU, }}, chksums: []string{ "e38b27eaccb4391bdec553a7f3ae6b2f", "c65bd2e50a56a2138bf1716f2fd56fe9", }, }, { file: "testdata/sparse-formats.tar", headers: []*Header{{ Name: "sparse-gnu", Mode: 420, Uid: 1000, Gid: 1000, Size: 200, ModTime: time.Unix(1392395740, 0), Typeflag: 0x53, Linkname: "", Uname: "david", Gname: "david", Devmajor: 0, Devminor: 0, Format: FormatGNU, }, { Name: "sparse-posix-0.0", Mode: 420, Uid: 1000, Gid: 1000, Size: 200, ModTime: time.Unix(1392342187, 0), Typeflag: 0x30, Linkname: "", Uname: "david", Gname: "david", Devmajor: 0, Devminor: 0, PAXRecords: map[string]string{ "GNU.sparse.size": "200", "GNU.sparse.numblocks": "95", "GNU.sparse.map": "1,1,3,1,5,1,7,1,9,1,11,1,13,1,15,1,17,1,19,1,21,1,23,1,25,1,27,1,29,1,31,1,33,1,35,1,37,1,39,1,41,1,43,1,45,1,47,1,49,1,51,1,53,1,55,1,57,1,59,1,61,1,63,1,65,1,67,1,69,1,71,1,73,1,75,1,77,1,79,1,81,1,83,1,85,1,87,1,89,1,91,1,93,1,95,1,97,1,99,1,101,1,103,1,105,1,107,1,109,1,111,1,113,1,115,1,117,1,119,1,121,1,123,1,125,1,127,1,129,1,131,1,133,1,135,1,137,1,139,1,141,1,143,1,145,1,147,1,149,1,151,1,153,1,155,1,157,1,159,1,161,1,163,1,165,1,167,1,169,1,171,1,173,1,175,1,177,1,179,1,181,1,183,1,185,1,187,1,189,1", }, Format: FormatPAX, }, { Name: "sparse-posix-0.1", Mode: 420, Uid: 1000, Gid: 1000, Size: 200, ModTime: time.Unix(1392340456, 0), Typeflag: 0x30, Linkname: "", Uname: "david", Gname: "david", Devmajor: 0, Devminor: 0, PAXRecords: map[string]string{ "GNU.sparse.size": "200", "GNU.sparse.numblocks": "95", "GNU.sparse.map": "1,1,3,1,5,1,7,1,9,1,11,1,13,1,15,1,17,1,19,1,21,1,23,1,25,1,27,1,29,1,31,1,33,1,35,1,37,1,39,1,41,1,43,1,45,1,47,1,49,1,51,1,53,1,55,1,57,1,59,1,61,1,63,1,65,1,67,1,69,1,71,1,73,1,75,1,77,1,79,1,81,1,83,1,85,1,87,1,89,1,91,1,93,1,95,1,97,1,99,1,101,1,103,1,105,1,107,1,109,1,111,1,113,1,115,1,117,1,119,1,121,1,123,1,125,1,127,1,129,1,131,1,133,1,135,1,137,1,139,1,141,1,143,1,145,1,147,1,149,1,151,1,153,1,155,1,157,1,159,1,161,1,163,1,165,1,167,1,169,1,171,1,173,1,175,1,177,1,179,1,181,1,183,1,185,1,187,1,189,1", "GNU.sparse.name": "sparse-posix-0.1", }, Format: FormatPAX, }, { Name: "sparse-posix-1.0", Mode: 420, Uid: 1000, Gid: 1000, Size: 200, ModTime: time.Unix(1392337404, 0), Typeflag: 0x30, Linkname: "", Uname: "david", Gname: "david", Devmajor: 0, Devminor: 0, PAXRecords: map[string]string{ "GNU.sparse.major": "1", "GNU.sparse.minor": "0", "GNU.sparse.realsize": "200", "GNU.sparse.name": "sparse-posix-1.0", }, Format: FormatPAX, }, { Name: "end", Mode: 420, Uid: 1000, Gid: 1000, Size: 4, ModTime: time.Unix(1392398319, 0), Typeflag: 0x30, Linkname: "", Uname: "david", Gname: "david", Devmajor: 0, Devminor: 0, Format: FormatGNU, }}, chksums: []string{ "6f53234398c2449fe67c1812d993012f", "6f53234398c2449fe67c1812d993012f", "6f53234398c2449fe67c1812d993012f", "6f53234398c2449fe67c1812d993012f", "b0061974914468de549a2af8ced10316", }, }, { file: "testdata/star.tar", headers: []*Header{{ Name: "small.txt", Mode: 0640, Uid: 73025, Gid: 5000, Size: 5, ModTime: time.Unix(1244592783, 0), Typeflag: '0', Uname: "dsymonds", Gname: "eng", AccessTime: time.Unix(1244592783, 0), ChangeTime: time.Unix(1244592783, 0), }, { Name: "small2.txt", Mode: 0640, Uid: 73025, Gid: 5000, Size: 11, ModTime: time.Unix(1244592783, 0), Typeflag: '0', Uname: "dsymonds", Gname: "eng", AccessTime: time.Unix(1244592783, 0), ChangeTime: time.Unix(1244592783, 0), }}, }, { file: "testdata/v7.tar", headers: []*Header{{ Name: "small.txt", Mode: 0444, Uid: 73025, Gid: 5000, Size: 5, ModTime: time.Unix(1244593104, 0), Typeflag: '0', }, { Name: "small2.txt", Mode: 0444, Uid: 73025, Gid: 5000, Size: 11, ModTime: time.Unix(1244593104, 0), Typeflag: '0', }}, }, { file: "testdata/pax.tar", headers: []*Header{{ Name: "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", Mode: 0664, Uid: 1000, Gid: 1000, Uname: "shane", Gname: "shane", Size: 7, ModTime: time.Unix(1350244992, 23960108), ChangeTime: time.Unix(1350244992, 23960108), AccessTime: time.Unix(1350244992, 23960108), Typeflag: TypeReg, PAXRecords: map[string]string{ "path": "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", "mtime": "1350244992.023960108", "atime": "1350244992.023960108", "ctime": "1350244992.023960108", }, Format: FormatPAX, }, { Name: "a/b", Mode: 0777, Uid: 1000, Gid: 1000, Uname: "shane", Gname: "shane", Size: 0, ModTime: time.Unix(1350266320, 910238425), ChangeTime: time.Unix(1350266320, 910238425), AccessTime: time.Unix(1350266320, 910238425), Typeflag: TypeSymlink, Linkname: "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", PAXRecords: map[string]string{ "linkpath": "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", "mtime": "1350266320.910238425", "atime": "1350266320.910238425", "ctime": "1350266320.910238425", }, Format: FormatPAX, }}, }, { file: "testdata/pax-bad-hdr-file.tar", err: ErrHeader, }, { file: "testdata/pax-bad-mtime-file.tar", err: ErrHeader, }, { file: "testdata/pax-pos-size-file.tar", headers: []*Header{{ Name: "foo", Mode: 0640, Uid: 319973, Gid: 5000, Size: 999, ModTime: time.Unix(1442282516, 0), Typeflag: '0', Uname: "joetsai", Gname: "eng", PAXRecords: map[string]string{ "size": "000000000000000000000999", }, Format: FormatPAX, }}, chksums: []string{ "0afb597b283fe61b5d4879669a350556", }, }, { file: "testdata/pax-records.tar", headers: []*Header{{ Typeflag: TypeReg, Name: "file", Uname: strings.Repeat("long", 10), ModTime: time.Unix(0, 0), PAXRecords: map[string]string{ "GOLANG.pkg": "tar", "comment": "Hello, 世界", "uname": strings.Repeat("long", 10), }, Format: FormatPAX, }}, }, { file: "testdata/pax-global-records.tar", headers: []*Header{{ Typeflag: TypeXGlobalHeader, Name: "global1", PAXRecords: map[string]string{"path": "global1", "mtime": "1500000000.0"}, Format: FormatPAX, }, { Typeflag: TypeReg, Name: "file1", ModTime: time.Unix(0, 0), Format: FormatUSTAR, }, { Typeflag: TypeReg, Name: "file2", PAXRecords: map[string]string{"path": "file2"}, ModTime: time.Unix(0, 0), Format: FormatPAX, }, { Typeflag: TypeXGlobalHeader, Name: "GlobalHead.0.0", PAXRecords: map[string]string{"path": ""}, Format: FormatPAX, }, { Typeflag: TypeReg, Name: "file3", ModTime: time.Unix(0, 0), Format: FormatUSTAR, }, { Typeflag: TypeReg, Name: "file4", ModTime: time.Unix(1400000000, 0), PAXRecords: map[string]string{"mtime": "1400000000"}, Format: FormatPAX, }}, }, { file: "testdata/nil-uid.tar", // golang.org/issue/5290 headers: []*Header{{ Name: "P1050238.JPG.log", Mode: 0664, Uid: 0, Gid: 0, Size: 14, ModTime: time.Unix(1365454838, 0), Typeflag: TypeReg, Linkname: "", Uname: "eyefi", Gname: "eyefi", Devmajor: 0, Devminor: 0, Format: FormatGNU, }}, }, { file: "testdata/xattrs.tar", headers: []*Header{{ Name: "small.txt", Mode: 0644, Uid: 1000, Gid: 10, Size: 5, ModTime: time.Unix(1386065770, 448252320), Typeflag: '0', Uname: "alex", Gname: "wheel", AccessTime: time.Unix(1389782991, 419875220), ChangeTime: time.Unix(1389782956, 794414986), Xattrs: map[string]string{ "user.key": "value", "user.key2": "value2", // Interestingly, selinux encodes the terminating null inside the xattr "security.selinux": "unconfined_u:object_r:default_t:s0\x00", }, PAXRecords: map[string]string{ "mtime": "1386065770.44825232", "atime": "1389782991.41987522", "ctime": "1389782956.794414986", "SCHILY.xattr.user.key": "value", "SCHILY.xattr.user.key2": "value2", "SCHILY.xattr.security.selinux": "unconfined_u:object_r:default_t:s0\x00", }, Format: FormatPAX, }, { Name: "small2.txt", Mode: 0644, Uid: 1000, Gid: 10, Size: 11, ModTime: time.Unix(1386065770, 449252304), Typeflag: '0', Uname: "alex", Gname: "wheel", AccessTime: time.Unix(1389782991, 419875220), ChangeTime: time.Unix(1386065770, 449252304), Xattrs: map[string]string{ "security.selinux": "unconfined_u:object_r:default_t:s0\x00", }, PAXRecords: map[string]string{ "mtime": "1386065770.449252304", "atime": "1389782991.41987522", "ctime": "1386065770.449252304", "SCHILY.xattr.security.selinux": "unconfined_u:object_r:default_t:s0\x00", }, Format: FormatPAX, }}, }, { // Matches the behavior of GNU, BSD, and STAR tar utilities. file: "testdata/gnu-multi-hdrs.tar", headers: []*Header{{ Name: "GNU2/GNU2/long-path-name", Linkname: "GNU4/GNU4/long-linkpath-name", ModTime: time.Unix(0, 0), Typeflag: '2', Format: FormatGNU, }}, }, { // GNU tar file with atime and ctime fields set. // Created with the GNU tar v1.27.1. // tar --incremental -S -cvf gnu-incremental.tar test2 file: "testdata/gnu-incremental.tar", headers: []*Header{{ Name: "test2/", Mode: 16877, Uid: 1000, Gid: 1000, Size: 14, ModTime: time.Unix(1441973427, 0), Typeflag: 'D', Uname: "rawr", Gname: "dsnet", AccessTime: time.Unix(1441974501, 0), ChangeTime: time.Unix(1441973436, 0), Format: FormatGNU, }, { Name: "test2/foo", Mode: 33188, Uid: 1000, Gid: 1000, Size: 64, ModTime: time.Unix(1441973363, 0), Typeflag: '0', Uname: "rawr", Gname: "dsnet", AccessTime: time.Unix(1441974501, 0), ChangeTime: time.Unix(1441973436, 0), Format: FormatGNU, }, { Name: "test2/sparse", Mode: 33188, Uid: 1000, Gid: 1000, Size: 536870912, ModTime: time.Unix(1441973427, 0), Typeflag: 'S', Uname: "rawr", Gname: "dsnet", AccessTime: time.Unix(1441991948, 0), ChangeTime: time.Unix(1441973436, 0), Format: FormatGNU, }}, }, { // Matches the behavior of GNU and BSD tar utilities. file: "testdata/pax-multi-hdrs.tar", headers: []*Header{{ Name: "bar", Linkname: "PAX4/PAX4/long-linkpath-name", ModTime: time.Unix(0, 0), Typeflag: '2', PAXRecords: map[string]string{ "linkpath": "PAX4/PAX4/long-linkpath-name", }, Format: FormatPAX, }}, }, { // Both BSD and GNU tar truncate long names at first NUL even // if there is data following that NUL character. // This is reasonable as GNU long names are C-strings. file: "testdata/gnu-long-nul.tar", headers: []*Header{{ Name: "0123456789", Mode: 0644, Uid: 1000, Gid: 1000, ModTime: time.Unix(1486082191, 0), Typeflag: '0', Uname: "rawr", Gname: "dsnet", Format: FormatGNU, }}, }, { // This archive was generated by Writer but is readable by both // GNU and BSD tar utilities. // The archive generated by GNU is nearly byte-for-byte identical // to the Go version except the Go version sets a negative Devminor // just to force the GNU format. file: "testdata/gnu-utf8.tar", headers: []*Header{{ Name: "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹", Mode: 0644, Uid: 1000, Gid: 1000, ModTime: time.Unix(0, 0), Typeflag: '0', Uname: "☺", Gname: "⚹", Format: FormatGNU, }}, }, { // This archive was generated by Writer but is readable by both // GNU and BSD tar utilities. // The archive generated by GNU is nearly byte-for-byte identical // to the Go version except the Go version sets a negative Devminor // just to force the GNU format. file: "testdata/gnu-not-utf8.tar", headers: []*Header{{ Name: "hi\x80\x81\x82\x83bye", Mode: 0644, Uid: 1000, Gid: 1000, ModTime: time.Unix(0, 0), Typeflag: '0', Uname: "rawr", Gname: "dsnet", Format: FormatGNU, }}, }, { // BSD tar v3.1.2 and GNU tar v1.27.1 both rejects PAX records // with NULs in the key. file: "testdata/pax-nul-xattrs.tar", err: ErrHeader, }, { // BSD tar v3.1.2 rejects a PAX path with NUL in the value, while // GNU tar v1.27.1 simply truncates at first NUL. // We emulate the behavior of BSD since it is strange doing NUL // truncations since PAX records are length-prefix strings instead // of NUL-terminated C-strings. file: "testdata/pax-nul-path.tar", err: ErrHeader, }, { file: "testdata/neg-size.tar", err: ErrHeader, }, { file: "testdata/issue10968.tar", err: ErrHeader, }, { file: "testdata/issue11169.tar", err: ErrHeader, }, { file: "testdata/issue12435.tar", err: ErrHeader, }, { // Ensure that we can read back the original Header as written with // a buggy pre-Go1.8 tar.Writer. file: "testdata/invalid-go17.tar", headers: []*Header{{ Name: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo", Uid: 010000000, ModTime: time.Unix(0, 0), Typeflag: '0', }}, }, { // USTAR archive with a regular entry with non-zero device numbers. file: "testdata/ustar-file-devs.tar", headers: []*Header{{ Name: "file", Mode: 0644, Typeflag: '0', ModTime: time.Unix(0, 0), Devmajor: 1, Devminor: 1, Format: FormatUSTAR, }}, }, { // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1. file: "testdata/gnu-nil-sparse-data.tar", headers: []*Header{{ Name: "sparse.db", Typeflag: TypeGNUSparse, Size: 1000, ModTime: time.Unix(0, 0), Format: FormatGNU, }}, }, { // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1. file: "testdata/gnu-nil-sparse-hole.tar", headers: []*Header{{ Name: "sparse.db", Typeflag: TypeGNUSparse, Size: 1000, ModTime: time.Unix(0, 0), Format: FormatGNU, }}, }, { // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1. file: "testdata/pax-nil-sparse-data.tar", headers: []*Header{{ Name: "sparse.db", Typeflag: TypeReg, Size: 1000, ModTime: time.Unix(0, 0), PAXRecords: map[string]string{ "size": "1512", "GNU.sparse.major": "1", "GNU.sparse.minor": "0", "GNU.sparse.realsize": "1000", "GNU.sparse.name": "sparse.db", }, Format: FormatPAX, }}, }, { // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1. file: "testdata/pax-nil-sparse-hole.tar", headers: []*Header{{ Name: "sparse.db", Typeflag: TypeReg, Size: 1000, ModTime: time.Unix(0, 0), PAXRecords: map[string]string{ "size": "512", "GNU.sparse.major": "1", "GNU.sparse.minor": "0", "GNU.sparse.realsize": "1000", "GNU.sparse.name": "sparse.db", }, Format: FormatPAX, }}, }, { file: "testdata/trailing-slash.tar", headers: []*Header{{ Typeflag: TypeDir, Name: strings.Repeat("123456789/", 30), ModTime: time.Unix(0, 0), PAXRecords: map[string]string{ "path": strings.Repeat("123456789/", 30), }, Format: FormatPAX, }}, }} for _, v := range vectors { t.Run(path.Base(v.file), func(t *testing.T) { f, err := os.Open(v.file) if err != nil { t.Fatalf("unexpected error: %v", err) } defer f.Close() // Capture all headers and checksums. var ( tr = NewReader(f) hdrs []*Header chksums []string rdbuf = make([]byte, 8) ) for { var hdr *Header hdr, err = tr.Next() if err != nil { if err == io.EOF { err = nil // Expected error } break } hdrs = append(hdrs, hdr) if v.chksums == nil { continue } h := md5.New() _, err = io.CopyBuffer(h, tr, rdbuf) // Effectively an incremental read if err != nil { break } chksums = append(chksums, fmt.Sprintf("%x", h.Sum(nil))) } for i, hdr := range hdrs { if i >= len(v.headers) { t.Fatalf("entry %d: unexpected header:\ngot %+v", i, *hdr) continue } if !reflect.DeepEqual(*hdr, *v.headers[i]) { t.Fatalf("entry %d: incorrect header:\ngot %+v\nwant %+v", i, *hdr, *v.headers[i]) } } if len(hdrs) != len(v.headers) { t.Fatalf("got %d headers, want %d headers", len(hdrs), len(v.headers)) } for i, sum := range chksums { if i >= len(v.chksums) { t.Fatalf("entry %d: unexpected sum: got %s", i, sum) continue } if sum != v.chksums[i] { t.Fatalf("entry %d: incorrect checksum: got %s, want %s", i, sum, v.chksums[i]) } } if err != v.err { t.Fatalf("unexpected error: got %v, want %v", err, v.err) } f.Close() }) } } func TestPartialRead(t *testing.T) { type testCase struct { cnt int // Number of bytes to read output string // Expected value of string read } vectors := []struct { file string cases []testCase }{{ file: "testdata/gnu.tar", cases: []testCase{ {4, "Kilt"}, {6, "Google"}, }, }, { file: "testdata/sparse-formats.tar", cases: []testCase{ {2, "\x00G"}, {4, "\x00G\x00o"}, {6, "\x00G\x00o\x00G"}, {8, "\x00G\x00o\x00G\x00o"}, {4, "end\n"}, }, }} for _, v := range vectors { t.Run(path.Base(v.file), func(t *testing.T) { f, err := os.Open(v.file) if err != nil { t.Fatalf("Open() error: %v", err) } defer f.Close() tr := NewReader(f) for i, tc := range v.cases { hdr, err := tr.Next() if err != nil || hdr == nil { t.Fatalf("entry %d, Next(): got %v, want %v", i, err, nil) } buf := make([]byte, tc.cnt) if _, err := io.ReadFull(tr, buf); err != nil { t.Fatalf("entry %d, ReadFull(): got %v, want %v", i, err, nil) } if string(buf) != tc.output { t.Fatalf("entry %d, ReadFull(): got %q, want %q", i, string(buf), tc.output) } } if _, err := tr.Next(); err != io.EOF { t.Fatalf("Next(): got %v, want EOF", err) } }) } } func TestUninitializedRead(t *testing.T) { f, err := os.Open("testdata/gnu.tar") if err != nil { t.Fatalf("Unexpected error: %v", err) } defer f.Close() tr := NewReader(f) _, err = tr.Read([]byte{}) if err == nil || err != io.EOF { t.Errorf("Unexpected error: %v, wanted %v", err, io.EOF) } } type reader struct{ io.Reader } type readSeeker struct{ io.ReadSeeker } type readBadSeeker struct{ io.ReadSeeker } func (rbs *readBadSeeker) Seek(int64, int) (int64, error) { return 0, fmt.Errorf("illegal seek") } // TestReadTruncation test the ending condition on various truncated files and // that truncated files are still detected even if the underlying io.Reader // satisfies io.Seeker. func TestReadTruncation(t *testing.T) { var ss []string for _, p := range []string{ "testdata/gnu.tar", "testdata/ustar-file-reg.tar", "testdata/pax-path-hdr.tar", "testdata/sparse-formats.tar", } { buf, err := os.ReadFile(p) if err != nil { t.Fatalf("unexpected error: %v", err) } ss = append(ss, string(buf)) } data1, data2, pax, sparse := ss[0], ss[1], ss[2], ss[3] data2 += strings.Repeat("\x00", 10*512) trash := strings.Repeat("garbage ", 64) // Exactly 512 bytes vectors := []struct { input string // Input stream cnt int // Expected number of headers read err error // Expected error outcome }{ {"", 0, io.EOF}, // Empty file is a "valid" tar file {data1[:511], 0, io.ErrUnexpectedEOF}, {data1[:512], 1, io.ErrUnexpectedEOF}, {data1[:1024], 1, io.EOF}, {data1[:1536], 2, io.ErrUnexpectedEOF}, {data1[:2048], 2, io.EOF}, {data1, 2, io.EOF}, {data1[:2048] + data2[:1536], 3, io.EOF}, {data2[:511], 0, io.ErrUnexpectedEOF}, {data2[:512], 1, io.ErrUnexpectedEOF}, {data2[:1195], 1, io.ErrUnexpectedEOF}, {data2[:1196], 1, io.EOF}, // Exact end of data and start of padding {data2[:1200], 1, io.EOF}, {data2[:1535], 1, io.EOF}, {data2[:1536], 1, io.EOF}, // Exact end of padding {data2[:1536] + trash[:1], 1, io.ErrUnexpectedEOF}, {data2[:1536] + trash[:511], 1, io.ErrUnexpectedEOF}, {data2[:1536] + trash, 1, ErrHeader}, {data2[:2048], 1, io.EOF}, // Exactly 1 empty block {data2[:2048] + trash[:1], 1, io.ErrUnexpectedEOF}, {data2[:2048] + trash[:511], 1, io.ErrUnexpectedEOF}, {data2[:2048] + trash, 1, ErrHeader}, {data2[:2560], 1, io.EOF}, // Exactly 2 empty blocks (normal end-of-stream) {data2[:2560] + trash[:1], 1, io.EOF}, {data2[:2560] + trash[:511], 1, io.EOF}, {data2[:2560] + trash, 1, io.EOF}, {data2[:3072], 1, io.EOF}, {pax, 0, io.EOF}, // PAX header without data is a "valid" tar file {pax + trash[:1], 0, io.ErrUnexpectedEOF}, {pax + trash[:511], 0, io.ErrUnexpectedEOF}, {sparse[:511], 0, io.ErrUnexpectedEOF}, {sparse[:512], 0, io.ErrUnexpectedEOF}, {sparse[:3584], 1, io.EOF}, {sparse[:9200], 1, io.EOF}, // Terminate in padding of sparse header {sparse[:9216], 1, io.EOF}, {sparse[:9728], 2, io.ErrUnexpectedEOF}, {sparse[:10240], 2, io.EOF}, {sparse[:11264], 2, io.ErrUnexpectedEOF}, {sparse, 5, io.EOF}, {sparse + trash, 5, io.EOF}, } for i, v := range vectors { for j := 0; j < 6; j++ { var tr *Reader var s1, s2 string switch j { case 0: tr = NewReader(&reader{strings.NewReader(v.input)}) s1, s2 = "io.Reader", "auto" case 1: tr = NewReader(&reader{strings.NewReader(v.input)}) s1, s2 = "io.Reader", "manual" case 2: tr = NewReader(&readSeeker{strings.NewReader(v.input)}) s1, s2 = "io.ReadSeeker", "auto" case 3: tr = NewReader(&readSeeker{strings.NewReader(v.input)}) s1, s2 = "io.ReadSeeker", "manual" case 4: tr = NewReader(&readBadSeeker{strings.NewReader(v.input)}) s1, s2 = "ReadBadSeeker", "auto" case 5: tr = NewReader(&readBadSeeker{strings.NewReader(v.input)}) s1, s2 = "ReadBadSeeker", "manual" } var cnt int var err error for { if _, err = tr.Next(); err != nil { break } cnt++ if s2 == "manual" { if _, err = tr.writeTo(io.Discard); err != nil { break } } } if err != v.err { t.Errorf("test %d, NewReader(%s) with %s discard: got %v, want %v", i, s1, s2, err, v.err) } if cnt != v.cnt { t.Errorf("test %d, NewReader(%s) with %s discard: got %d headers, want %d headers", i, s1, s2, cnt, v.cnt) } } } } // TestReadHeaderOnly tests that Reader does not attempt to read special // header-only files. func TestReadHeaderOnly(t *testing.T) { f, err := os.Open("testdata/hdr-only.tar") if err != nil { t.Fatalf("unexpected error: %v", err) } defer f.Close() var hdrs []*Header tr := NewReader(f) for { hdr, err := tr.Next() if err == io.EOF { break } if err != nil { t.Errorf("Next(): got %v, want %v", err, nil) continue } hdrs = append(hdrs, hdr) // If a special flag, we should read nothing. cnt, _ := io.ReadFull(tr, []byte{0}) if cnt > 0 && hdr.Typeflag != TypeReg { t.Errorf("ReadFull(...): got %d bytes, want 0 bytes", cnt) } } // File is crafted with 16 entries. The later 8 are identical to the first // 8 except that the size is set. if len(hdrs) != 16 { t.Fatalf("len(hdrs): got %d, want %d", len(hdrs), 16) } for i := 0; i < 8; i++ { hdr1, hdr2 := hdrs[i+0], hdrs[i+8] hdr1.Size, hdr2.Size = 0, 0 if !reflect.DeepEqual(*hdr1, *hdr2) { t.Errorf("incorrect header:\ngot %+v\nwant %+v", *hdr1, *hdr2) } } } func TestMergePAX(t *testing.T) { vectors := []struct { in map[string]string want *Header ok bool }{{ in: map[string]string{ "path": "a/b/c", "uid": "1000", "mtime": "1350244992.023960108", }, want: &Header{ Name: "a/b/c", Uid: 1000, ModTime: time.Unix(1350244992, 23960108), PAXRecords: map[string]string{ "path": "a/b/c", "uid": "1000", "mtime": "1350244992.023960108", }, }, ok: true, }, { in: map[string]string{ "gid": "gtgergergersagersgers", }, ok: false, }, { in: map[string]string{ "missing": "missing", "SCHILY.xattr.key": "value", }, want: &Header{ Xattrs: map[string]string{"key": "value"}, PAXRecords: map[string]string{ "missing": "missing", "SCHILY.xattr.key": "value", }, }, ok: true, }} for i, v := range vectors { got := new(Header) err := mergePAX(got, v.in) if v.ok && !reflect.DeepEqual(*got, *v.want) { t.Errorf("test %d, mergePAX(...):\ngot %+v\nwant %+v", i, *got, *v.want) } if ok := err == nil; ok != v.ok { t.Errorf("test %d, mergePAX(...): got %v, want %v", i, ok, v.ok) } } } func TestParsePAX(t *testing.T) { vectors := []struct { in string want map[string]string ok bool }{ {"", nil, true}, {"6 k=1\n", map[string]string{"k": "1"}, true}, {"10 a=name\n", map[string]string{"a": "name"}, true}, {"9 a=name\n", map[string]string{"a": "name"}, true}, {"30 mtime=1350244992.023960108\n", map[string]string{"mtime": "1350244992.023960108"}, true}, {"3 somelongkey=\n", nil, false}, {"50 tooshort=\n", nil, false}, {"13 key1=haha\n13 key2=nana\n13 key3=kaka\n", map[string]string{"key1": "haha", "key2": "nana", "key3": "kaka"}, true}, {"13 key1=val1\n13 key2=val2\n8 key1=\n", map[string]string{"key1": "", "key2": "val2"}, true}, {"22 GNU.sparse.size=10\n26 GNU.sparse.numblocks=2\n" + "23 GNU.sparse.offset=1\n25 GNU.sparse.numbytes=2\n" + "23 GNU.sparse.offset=3\n25 GNU.sparse.numbytes=4\n", map[string]string{paxGNUSparseSize: "10", paxGNUSparseNumBlocks: "2", paxGNUSparseMap: "1,2,3,4"}, true}, {"22 GNU.sparse.size=10\n26 GNU.sparse.numblocks=1\n" + "25 GNU.sparse.numbytes=2\n23 GNU.sparse.offset=1\n", nil, false}, {"22 GNU.sparse.size=10\n26 GNU.sparse.numblocks=1\n" + "25 GNU.sparse.offset=1,2\n25 GNU.sparse.numbytes=2\n", nil, false}, } for i, v := range vectors { r := strings.NewReader(v.in) got, err := parsePAX(r) if !reflect.DeepEqual(got, v.want) && !(len(got) == 0 && len(v.want) == 0) { t.Errorf("test %d, parsePAX():\ngot %v\nwant %v", i, got, v.want) } if ok := err == nil; ok != v.ok { t.Errorf("test %d, parsePAX(): got %v, want %v", i, ok, v.ok) } } } func TestReadOldGNUSparseMap(t *testing.T) { populateSparseMap := func(sa sparseArray, sps []string) []string { for i := 0; len(sps) > 0 && i < sa.MaxEntries(); i++ { copy(sa.Entry(i), sps[0]) sps = sps[1:] } if len(sps) > 0 { copy(sa.IsExtended(), "\x80") } return sps } makeInput := func(format Format, size string, sps ...string) (out []byte) { // Write the initial GNU header. var blk block gnu := blk.GNU() sparse := gnu.Sparse() copy(gnu.RealSize(), size) sps = populateSparseMap(sparse, sps) if format != FormatUnknown { blk.SetFormat(format) } out = append(out, blk[:]...) // Write extended sparse blocks. for len(sps) > 0 { var blk block sps = populateSparseMap(blk.Sparse(), sps) out = append(out, blk[:]...) } return out } makeSparseStrings := func(sp []sparseEntry) (out []string) { var f formatter for _, s := range sp { var b [24]byte f.formatNumeric(b[:12], s.Offset) f.formatNumeric(b[12:], s.Length) out = append(out, string(b[:])) } return out } vectors := []struct { input []byte wantMap sparseDatas wantSize int64 wantErr error }{{ input: makeInput(FormatUnknown, ""), wantErr: ErrHeader, }, { input: makeInput(FormatGNU, "1234", "fewa"), wantSize: 01234, wantErr: ErrHeader, }, { input: makeInput(FormatGNU, "0031"), wantSize: 031, }, { input: makeInput(FormatGNU, "80"), wantErr: ErrHeader, }, { input: makeInput(FormatGNU, "1234", makeSparseStrings(sparseDatas{{0, 0}, {1, 1}})...), wantMap: sparseDatas{{0, 0}, {1, 1}}, wantSize: 01234, }, { input: makeInput(FormatGNU, "1234", append(makeSparseStrings(sparseDatas{{0, 0}, {1, 1}}), []string{"", "blah"}...)...), wantMap: sparseDatas{{0, 0}, {1, 1}}, wantSize: 01234, }, { input: makeInput(FormatGNU, "3333", makeSparseStrings(sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}})...), wantMap: sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}}, wantSize: 03333, }, { input: makeInput(FormatGNU, "", append(append( makeSparseStrings(sparseDatas{{0, 1}, {2, 1}}), []string{"", ""}...), makeSparseStrings(sparseDatas{{4, 1}, {6, 1}})...)...), wantMap: sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}}, }, { input: makeInput(FormatGNU, "", makeSparseStrings(sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}})...)[:blockSize], wantErr: io.ErrUnexpectedEOF, }, { input: makeInput(FormatGNU, "", makeSparseStrings(sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}})...)[:3*blockSize/2], wantErr: io.ErrUnexpectedEOF, }, { input: makeInput(FormatGNU, "", makeSparseStrings(sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}})...), wantMap: sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}}, }, { input: makeInput(FormatGNU, "", makeSparseStrings(sparseDatas{{10 << 30, 512}, {20 << 30, 512}})...), wantMap: sparseDatas{{10 << 30, 512}, {20 << 30, 512}}, }} for i, v := range vectors { var blk block var hdr Header v.input = v.input[copy(blk[:], v.input):] tr := Reader{r: bytes.NewReader(v.input)} got, err := tr.readOldGNUSparseMap(&hdr, &blk) if !equalSparseEntries(got, v.wantMap) { t.Errorf("test %d, readOldGNUSparseMap(): got %v, want %v", i, got, v.wantMap) } if err != v.wantErr { t.Errorf("test %d, readOldGNUSparseMap() = %v, want %v", i, err, v.wantErr) } if hdr.Size != v.wantSize { t.Errorf("test %d, Header.Size = %d, want %d", i, hdr.Size, v.wantSize) } } } func TestReadGNUSparsePAXHeaders(t *testing.T) { padInput := func(s string) string { return s + string(zeroBlock[:blockPadding(int64(len(s)))]) } vectors := []struct { inputData string inputHdrs map[string]string wantMap sparseDatas wantSize int64 wantName string wantErr error }{{ inputHdrs: nil, wantErr: nil, }, { inputHdrs: map[string]string{ paxGNUSparseNumBlocks: strconv.FormatInt(math.MaxInt64, 10), paxGNUSparseMap: "0,1,2,3", }, wantErr: ErrHeader, }, { inputHdrs: map[string]string{ paxGNUSparseNumBlocks: "4\x00", paxGNUSparseMap: "0,1,2,3", }, wantErr: ErrHeader, }, { inputHdrs: map[string]string{ paxGNUSparseNumBlocks: "4", paxGNUSparseMap: "0,1,2,3", }, wantErr: ErrHeader, }, { inputHdrs: map[string]string{ paxGNUSparseNumBlocks: "2", paxGNUSparseMap: "0,1,2,3", }, wantMap: sparseDatas{{0, 1}, {2, 3}}, }, { inputHdrs: map[string]string{ paxGNUSparseNumBlocks: "2", paxGNUSparseMap: "0, 1,2,3", }, wantErr: ErrHeader, }, { inputHdrs: map[string]string{ paxGNUSparseNumBlocks: "2", paxGNUSparseMap: "0,1,02,3", paxGNUSparseRealSize: "4321", }, wantMap: sparseDatas{{0, 1}, {2, 3}}, wantSize: 4321, }, { inputHdrs: map[string]string{ paxGNUSparseNumBlocks: "2", paxGNUSparseMap: "0,one1,2,3", }, wantErr: ErrHeader, }, { inputHdrs: map[string]string{ paxGNUSparseMajor: "0", paxGNUSparseMinor: "0", paxGNUSparseNumBlocks: "2", paxGNUSparseMap: "0,1,2,3", paxGNUSparseSize: "1234", paxGNUSparseRealSize: "4321", paxGNUSparseName: "realname", }, wantMap: sparseDatas{{0, 1}, {2, 3}}, wantSize: 1234, wantName: "realname", }, { inputHdrs: map[string]string{ paxGNUSparseMajor: "0", paxGNUSparseMinor: "0", paxGNUSparseNumBlocks: "1", paxGNUSparseMap: "10737418240,512", paxGNUSparseSize: "10737418240", paxGNUSparseName: "realname", }, wantMap: sparseDatas{{10737418240, 512}}, wantSize: 10737418240, wantName: "realname", }, { inputHdrs: map[string]string{ paxGNUSparseMajor: "0", paxGNUSparseMinor: "0", paxGNUSparseNumBlocks: "0", paxGNUSparseMap: "", }, wantMap: sparseDatas{}, }, { inputHdrs: map[string]string{ paxGNUSparseMajor: "0", paxGNUSparseMinor: "1", paxGNUSparseNumBlocks: "4", paxGNUSparseMap: "0,5,10,5,20,5,30,5", }, wantMap: sparseDatas{{0, 5}, {10, 5}, {20, 5}, {30, 5}}, }, { inputHdrs: map[string]string{ paxGNUSparseMajor: "1", paxGNUSparseMinor: "0", paxGNUSparseNumBlocks: "4", paxGNUSparseMap: "0,5,10,5,20,5,30,5", }, wantErr: io.ErrUnexpectedEOF, }, { inputData: padInput("0\n"), inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, wantMap: sparseDatas{}, }, { inputData: padInput("0\n")[:blockSize-1] + "#", inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, wantMap: sparseDatas{}, }, { inputData: padInput("0"), inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, wantErr: io.ErrUnexpectedEOF, }, { inputData: padInput("ab\n"), inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, wantErr: ErrHeader, }, { inputData: padInput("1\n2\n3\n"), inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, wantMap: sparseDatas{{2, 3}}, }, { inputData: padInput("1\n2\n"), inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, wantErr: io.ErrUnexpectedEOF, }, { inputData: padInput("1\n2\n\n"), inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, wantErr: ErrHeader, }, { inputData: string(zeroBlock[:]) + padInput("0\n"), inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, wantErr: ErrHeader, }, { inputData: strings.Repeat("0", blockSize) + padInput("1\n5\n1\n"), inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, wantMap: sparseDatas{{5, 1}}, }, { inputData: padInput(fmt.Sprintf("%d\n", int64(math.MaxInt64))), inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, wantErr: ErrHeader, }, { inputData: padInput(strings.Repeat("0", 300) + "1\n" + strings.Repeat("0", 1000) + "5\n" + strings.Repeat("0", 800) + "2\n"), inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, wantMap: sparseDatas{{5, 2}}, }, { inputData: padInput("2\n10737418240\n512\n21474836480\n512\n"), inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, wantMap: sparseDatas{{10737418240, 512}, {21474836480, 512}}, }, { inputData: padInput("100\n" + func() string { var ss []string for i := 0; i < 100; i++ { ss = append(ss, fmt.Sprintf("%d\n%d\n", int64(i)<<30, 512)) } return strings.Join(ss, "") }()), inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, wantMap: func() (spd sparseDatas) { for i := 0; i < 100; i++ { spd = append(spd, sparseEntry{int64(i) << 30, 512}) } return spd }(), }} for i, v := range vectors { var hdr Header hdr.PAXRecords = v.inputHdrs r := strings.NewReader(v.inputData + "#") // Add canary byte tr := Reader{curr: ®FileReader{r, int64(r.Len())}} got, err := tr.readGNUSparsePAXHeaders(&hdr) if !equalSparseEntries(got, v.wantMap) { t.Errorf("test %d, readGNUSparsePAXHeaders(): got %v, want %v", i, got, v.wantMap) } if err != v.wantErr { t.Errorf("test %d, readGNUSparsePAXHeaders() = %v, want %v", i, err, v.wantErr) } if hdr.Size != v.wantSize { t.Errorf("test %d, Header.Size = %d, want %d", i, hdr.Size, v.wantSize) } if hdr.Name != v.wantName { t.Errorf("test %d, Header.Name = %s, want %s", i, hdr.Name, v.wantName) } if v.wantErr == nil && r.Len() == 0 { t.Errorf("test %d, canary byte unexpectedly consumed", i) } } } // testNonEmptyReader wraps an io.Reader and ensures that // Read is never called with an empty buffer. type testNonEmptyReader struct{ io.Reader } func (r testNonEmptyReader) Read(b []byte) (int, error) { if len(b) == 0 { return 0, errors.New("unexpected empty Read call") } return r.Reader.Read(b) } func TestFileReader(t *testing.T) { type ( testRead struct { // Read(cnt) == (wantStr, wantErr) cnt int wantStr string wantErr error } testWriteTo struct { // WriteTo(testFile{ops}) == (wantCnt, wantErr) ops fileOps wantCnt int64 wantErr error } testRemaining struct { // LogicalRemaining() == wantLCnt, PhysicalRemaining() == wantPCnt wantLCnt int64 wantPCnt int64 } testFnc interface{} // testRead | testWriteTo | testRemaining ) type ( makeReg struct { str string size int64 } makeSparse struct { makeReg makeReg spd sparseDatas size int64 } fileMaker interface{} // makeReg | makeSparse ) vectors := []struct { maker fileMaker tests []testFnc }{{ maker: makeReg{"", 0}, tests: []testFnc{ testRemaining{0, 0}, testRead{0, "", io.EOF}, testRead{1, "", io.EOF}, testWriteTo{nil, 0, nil}, testRemaining{0, 0}, }, }, { maker: makeReg{"", 1}, tests: []testFnc{ testRemaining{1, 1}, testRead{5, "", io.ErrUnexpectedEOF}, testWriteTo{nil, 0, io.ErrUnexpectedEOF}, testRemaining{1, 1}, }, }, { maker: makeReg{"hello", 5}, tests: []testFnc{ testRemaining{5, 5}, testRead{5, "hello", io.EOF}, testRemaining{0, 0}, }, }, { maker: makeReg{"hello, world", 50}, tests: []testFnc{ testRemaining{50, 50}, testRead{7, "hello, ", nil}, testRemaining{43, 43}, testRead{5, "world", nil}, testRemaining{38, 38}, testWriteTo{nil, 0, io.ErrUnexpectedEOF}, testRead{1, "", io.ErrUnexpectedEOF}, testRemaining{38, 38}, }, }, { maker: makeReg{"hello, world", 5}, tests: []testFnc{ testRemaining{5, 5}, testRead{0, "", nil}, testRead{4, "hell", nil}, testRemaining{1, 1}, testWriteTo{fileOps{"o"}, 1, nil}, testRemaining{0, 0}, testWriteTo{nil, 0, nil}, testRead{0, "", io.EOF}, }, }, { maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 8}, tests: []testFnc{ testRemaining{8, 5}, testRead{3, "ab\x00", nil}, testRead{10, "\x00\x00cde", io.EOF}, testRemaining{0, 0}, }, }, { maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 8}, tests: []testFnc{ testRemaining{8, 5}, testWriteTo{fileOps{"ab", int64(3), "cde"}, 8, nil}, testRemaining{0, 0}, }, }, { maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 10}, tests: []testFnc{ testRemaining{10, 5}, testRead{100, "ab\x00\x00\x00cde\x00\x00", io.EOF}, testRemaining{0, 0}, }, }, { maker: makeSparse{makeReg{"abc", 5}, sparseDatas{{0, 2}, {5, 3}}, 10}, tests: []testFnc{ testRemaining{10, 5}, testRead{100, "ab\x00\x00\x00c", io.ErrUnexpectedEOF}, testRemaining{4, 2}, }, }, { maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}}, 8}, tests: []testFnc{ testRemaining{8, 5}, testRead{8, "\x00abc\x00\x00de", io.EOF}, testRemaining{0, 0}, }, }, { maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 0}, {6, 0}, {6, 2}}, 8}, tests: []testFnc{ testRemaining{8, 5}, testRead{8, "\x00abc\x00\x00de", io.EOF}, testRemaining{0, 0}, }, }, { maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 0}, {6, 0}, {6, 2}}, 8}, tests: []testFnc{ testRemaining{8, 5}, testWriteTo{fileOps{int64(1), "abc", int64(2), "de"}, 8, nil}, testRemaining{0, 0}, }, }, { maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}}, 10}, tests: []testFnc{ testRead{100, "\x00abc\x00\x00de\x00\x00", io.EOF}, }, }, { maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}}, 10}, tests: []testFnc{ testWriteTo{fileOps{int64(1), "abc", int64(2), "de", int64(1), "\x00"}, 10, nil}, }, }, { maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}, {8, 0}, {8, 0}, {8, 0}, {8, 0}}, 10}, tests: []testFnc{ testRead{100, "\x00abc\x00\x00de\x00\x00", io.EOF}, }, }, { maker: makeSparse{makeReg{"", 0}, sparseDatas{}, 2}, tests: []testFnc{ testRead{100, "\x00\x00", io.EOF}, }, }, { maker: makeSparse{makeReg{"", 8}, sparseDatas{{1, 3}, {6, 5}}, 15}, tests: []testFnc{ testRead{100, "\x00", io.ErrUnexpectedEOF}, }, }, { maker: makeSparse{makeReg{"ab", 2}, sparseDatas{{1, 3}, {6, 5}}, 15}, tests: []testFnc{ testRead{100, "\x00ab", errMissData}, }, }, { maker: makeSparse{makeReg{"ab", 8}, sparseDatas{{1, 3}, {6, 5}}, 15}, tests: []testFnc{ testRead{100, "\x00ab", io.ErrUnexpectedEOF}, }, }, { maker: makeSparse{makeReg{"abc", 3}, sparseDatas{{1, 3}, {6, 5}}, 15}, tests: []testFnc{ testRead{100, "\x00abc\x00\x00", errMissData}, }, }, { maker: makeSparse{makeReg{"abc", 8}, sparseDatas{{1, 3}, {6, 5}}, 15}, tests: []testFnc{ testRead{100, "\x00abc\x00\x00", io.ErrUnexpectedEOF}, }, }, { maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 5}}, 15}, tests: []testFnc{ testRead{100, "\x00abc\x00\x00de", errMissData}, }, }, { maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 5}}, 15}, tests: []testFnc{ testWriteTo{fileOps{int64(1), "abc", int64(2), "de"}, 8, errMissData}, }, }, { maker: makeSparse{makeReg{"abcde", 8}, sparseDatas{{1, 3}, {6, 5}}, 15}, tests: []testFnc{ testRead{100, "\x00abc\x00\x00de", io.ErrUnexpectedEOF}, }, }, { maker: makeSparse{makeReg{"abcdefghEXTRA", 13}, sparseDatas{{1, 3}, {6, 5}}, 15}, tests: []testFnc{ testRemaining{15, 13}, testRead{100, "\x00abc\x00\x00defgh\x00\x00\x00\x00", errUnrefData}, testWriteTo{nil, 0, errUnrefData}, testRemaining{0, 5}, }, }, { maker: makeSparse{makeReg{"abcdefghEXTRA", 13}, sparseDatas{{1, 3}, {6, 5}}, 15}, tests: []testFnc{ testRemaining{15, 13}, testWriteTo{fileOps{int64(1), "abc", int64(2), "defgh", int64(4)}, 15, errUnrefData}, testRead{100, "", errUnrefData}, testRemaining{0, 5}, }, }} for i, v := range vectors { var fr fileReader switch maker := v.maker.(type) { case makeReg: r := testNonEmptyReader{strings.NewReader(maker.str)} fr = ®FileReader{r, maker.size} case makeSparse: if !validateSparseEntries(maker.spd, maker.size) { t.Fatalf("invalid sparse map: %v", maker.spd) } sph := invertSparseEntries(maker.spd, maker.size) r := testNonEmptyReader{strings.NewReader(maker.makeReg.str)} fr = ®FileReader{r, maker.makeReg.size} fr = &sparseFileReader{fr, sph, 0} default: t.Fatalf("test %d, unknown make operation: %T", i, maker) } for j, tf := range v.tests { switch tf := tf.(type) { case testRead: b := make([]byte, tf.cnt) n, err := fr.Read(b) if got := string(b[:n]); got != tf.wantStr || err != tf.wantErr { t.Errorf("test %d.%d, Read(%d):\ngot (%q, %v)\nwant (%q, %v)", i, j, tf.cnt, got, err, tf.wantStr, tf.wantErr) } case testWriteTo: f := &testFile{ops: tf.ops} got, err := fr.WriteTo(f) if _, ok := err.(testError); ok { t.Errorf("test %d.%d, WriteTo(): %v", i, j, err) } else if got != tf.wantCnt || err != tf.wantErr { t.Errorf("test %d.%d, WriteTo() = (%d, %v), want (%d, %v)", i, j, got, err, tf.wantCnt, tf.wantErr) } if len(f.ops) > 0 { t.Errorf("test %d.%d, expected %d more operations", i, j, len(f.ops)) } case testRemaining: if got := fr.LogicalRemaining(); got != tf.wantLCnt { t.Errorf("test %d.%d, LogicalRemaining() = %d, want %d", i, j, got, tf.wantLCnt) } if got := fr.PhysicalRemaining(); got != tf.wantPCnt { t.Errorf("test %d.%d, PhysicalRemaining() = %d, want %d", i, j, got, tf.wantPCnt) } default: t.Fatalf("test %d.%d, unknown test operation: %T", i, j, tf) } } } } vbatts-tar-split-6881021/archive/tar/stat_actime1.go000066400000000000000000000006361467537433400222660ustar00rootroot00000000000000// Copyright 2012 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // +build linux dragonfly openbsd solaris package tar import ( "syscall" "time" ) func statAtime(st *syscall.Stat_t) time.Time { return time.Unix(st.Atim.Unix()) } func statCtime(st *syscall.Stat_t) time.Time { return time.Unix(st.Ctim.Unix()) } vbatts-tar-split-6881021/archive/tar/stat_actime2.go000066400000000000000000000006361467537433400222670ustar00rootroot00000000000000// Copyright 2012 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // +build darwin freebsd netbsd package tar import ( "syscall" "time" ) func statAtime(st *syscall.Stat_t) time.Time { return time.Unix(st.Atimespec.Unix()) } func statCtime(st *syscall.Stat_t) time.Time { return time.Unix(st.Ctimespec.Unix()) } vbatts-tar-split-6881021/archive/tar/stat_unix.go000066400000000000000000000060051467537433400217220ustar00rootroot00000000000000// Copyright 2012 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // +build linux darwin dragonfly freebsd openbsd netbsd solaris package tar import ( "os" "os/user" "runtime" "strconv" "sync" "syscall" ) func init() { sysStat = statUnix } // userMap and groupMap caches UID and GID lookups for performance reasons. // The downside is that renaming uname or gname by the OS never takes effect. var userMap, groupMap sync.Map // map[int]string func statUnix(fi os.FileInfo, h *Header) error { sys, ok := fi.Sys().(*syscall.Stat_t) if !ok { return nil } h.Uid = int(sys.Uid) h.Gid = int(sys.Gid) // Best effort at populating Uname and Gname. // The os/user functions may fail for any number of reasons // (not implemented on that platform, cgo not enabled, etc). if u, ok := userMap.Load(h.Uid); ok { h.Uname = u.(string) } else if u, err := user.LookupId(strconv.Itoa(h.Uid)); err == nil { h.Uname = u.Username userMap.Store(h.Uid, h.Uname) } if g, ok := groupMap.Load(h.Gid); ok { h.Gname = g.(string) } else if g, err := user.LookupGroupId(strconv.Itoa(h.Gid)); err == nil { h.Gname = g.Name groupMap.Store(h.Gid, h.Gname) } h.AccessTime = statAtime(sys) h.ChangeTime = statCtime(sys) // Best effort at populating Devmajor and Devminor. if h.Typeflag == TypeChar || h.Typeflag == TypeBlock { dev := uint64(sys.Rdev) // May be int32 or uint32 switch runtime.GOOS { case "linux": // Copied from golang.org/x/sys/unix/dev_linux.go. major := uint32((dev & 0x00000000000fff00) >> 8) major |= uint32((dev & 0xfffff00000000000) >> 32) minor := uint32((dev & 0x00000000000000ff) >> 0) minor |= uint32((dev & 0x00000ffffff00000) >> 12) h.Devmajor, h.Devminor = int64(major), int64(minor) case "darwin": // Copied from golang.org/x/sys/unix/dev_darwin.go. major := uint32((dev >> 24) & 0xff) minor := uint32(dev & 0xffffff) h.Devmajor, h.Devminor = int64(major), int64(minor) case "dragonfly": // Copied from golang.org/x/sys/unix/dev_dragonfly.go. major := uint32((dev >> 8) & 0xff) minor := uint32(dev & 0xffff00ff) h.Devmajor, h.Devminor = int64(major), int64(minor) case "freebsd": // Copied from golang.org/x/sys/unix/dev_freebsd.go. major := uint32((dev >> 8) & 0xff) minor := uint32(dev & 0xffff00ff) h.Devmajor, h.Devminor = int64(major), int64(minor) case "netbsd": // Copied from golang.org/x/sys/unix/dev_netbsd.go. major := uint32((dev & 0x000fff00) >> 8) minor := uint32((dev & 0x000000ff) >> 0) minor |= uint32((dev & 0xfff00000) >> 12) h.Devmajor, h.Devminor = int64(major), int64(minor) case "openbsd": // Copied from golang.org/x/sys/unix/dev_openbsd.go. major := uint32((dev & 0x0000ff00) >> 8) minor := uint32((dev & 0x000000ff) >> 0) minor |= uint32((dev & 0xffff0000) >> 8) h.Devmajor, h.Devminor = int64(major), int64(minor) default: // TODO: Implement solaris (see https://golang.org/issue/8106) } } return nil } vbatts-tar-split-6881021/archive/tar/strconv.go000066400000000000000000000217251467537433400214100ustar00rootroot00000000000000// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package tar import ( "bytes" "fmt" "strconv" "strings" "time" ) // hasNUL reports whether the NUL character exists within s. func hasNUL(s string) bool { return strings.IndexByte(s, 0) >= 0 } // isASCII reports whether the input is an ASCII C-style string. func isASCII(s string) bool { for _, c := range s { if c >= 0x80 || c == 0x00 { return false } } return true } // toASCII converts the input to an ASCII C-style string. // This a best effort conversion, so invalid characters are dropped. func toASCII(s string) string { if isASCII(s) { return s } b := make([]byte, 0, len(s)) for _, c := range s { if c < 0x80 && c != 0x00 { b = append(b, byte(c)) } } return string(b) } type parser struct { err error // Last error seen } type formatter struct { err error // Last error seen } // parseString parses bytes as a NUL-terminated C-style string. // If a NUL byte is not found then the whole slice is returned as a string. func (*parser) parseString(b []byte) string { if i := bytes.IndexByte(b, 0); i >= 0 { return string(b[:i]) } return string(b) } // formatString copies s into b, NUL-terminating if possible. func (f *formatter) formatString(b []byte, s string) { if len(s) > len(b) { f.err = ErrFieldTooLong } copy(b, s) if len(s) < len(b) { b[len(s)] = 0 } // Some buggy readers treat regular files with a trailing slash // in the V7 path field as a directory even though the full path // recorded elsewhere (e.g., via PAX record) contains no trailing slash. if len(s) > len(b) && b[len(b)-1] == '/' { n := len(strings.TrimRight(s[:len(b)], "/")) b[n] = 0 // Replace trailing slash with NUL terminator } } // fitsInBase256 reports whether x can be encoded into n bytes using base-256 // encoding. Unlike octal encoding, base-256 encoding does not require that the // string ends with a NUL character. Thus, all n bytes are available for output. // // If operating in binary mode, this assumes strict GNU binary mode; which means // that the first byte can only be either 0x80 or 0xff. Thus, the first byte is // equivalent to the sign bit in two's complement form. func fitsInBase256(n int, x int64) bool { binBits := uint(n-1) * 8 return n >= 9 || (x >= -1< 0 && b[0]&0x80 != 0 { // Handling negative numbers relies on the following identity: // -a-1 == ^a // // If the number is negative, we use an inversion mask to invert the // data bytes and treat the value as an unsigned number. var inv byte // 0x00 if positive or zero, 0xff if negative if b[0]&0x40 != 0 { inv = 0xff } var x uint64 for i, c := range b { c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing if i == 0 { c &= 0x7f // Ignore signal bit in first byte } if (x >> 56) > 0 { p.err = ErrHeader // Integer overflow return 0 } x = x<<8 | uint64(c) } if (x >> 63) > 0 { p.err = ErrHeader // Integer overflow return 0 } if inv == 0xff { return ^int64(x) } return int64(x) } // Normal case is base-8 (octal) format. return p.parseOctal(b) } // formatNumeric encodes x into b using base-8 (octal) encoding if possible. // Otherwise it will attempt to use base-256 (binary) encoding. func (f *formatter) formatNumeric(b []byte, x int64) { if fitsInOctal(len(b), x) { f.formatOctal(b, x) return } if fitsInBase256(len(b), x) { for i := len(b) - 1; i >= 0; i-- { b[i] = byte(x) x >>= 8 } b[0] |= 0x80 // Highest bit indicates binary format return } f.formatOctal(b, 0) // Last resort, just write zero f.err = ErrFieldTooLong } func (p *parser) parseOctal(b []byte) int64 { // Because unused fields are filled with NULs, we need // to skip leading NULs. Fields may also be padded with // spaces or NULs. // So we remove leading and trailing NULs and spaces to // be sure. b = bytes.Trim(b, " \x00") if len(b) == 0 { return 0 } x, perr := strconv.ParseUint(p.parseString(b), 8, 64) if perr != nil { p.err = ErrHeader } return int64(x) } func (f *formatter) formatOctal(b []byte, x int64) { if !fitsInOctal(len(b), x) { x = 0 // Last resort, just write zero f.err = ErrFieldTooLong } s := strconv.FormatInt(x, 8) // Add leading zeros, but leave room for a NUL. if n := len(b) - len(s) - 1; n > 0 { s = strings.Repeat("0", n) + s } f.formatString(b, s) } // fitsInOctal reports whether the integer x fits in a field n-bytes long // using octal encoding with the appropriate NUL terminator. func fitsInOctal(n int, x int64) bool { octBits := uint(n-1) * 3 return x >= 0 && (n >= 22 || x < 1<= 0 { ss, sn = s[:pos], s[pos+1:] } // Parse the seconds. secs, err := strconv.ParseInt(ss, 10, 64) if err != nil { return time.Time{}, ErrHeader } if len(sn) == 0 { return time.Unix(secs, 0), nil // No sub-second values } // Parse the nanoseconds. if strings.Trim(sn, "0123456789") != "" { return time.Time{}, ErrHeader } if len(sn) < maxNanoSecondDigits { sn += strings.Repeat("0", maxNanoSecondDigits-len(sn)) // Right pad } else { sn = sn[:maxNanoSecondDigits] // Right truncate } nsecs, _ := strconv.ParseInt(sn, 10, 64) // Must succeed if len(ss) > 0 && ss[0] == '-' { return time.Unix(secs, -1*nsecs), nil // Negative correction } return time.Unix(secs, nsecs), nil } // formatPAXTime converts ts into a time of the form %d.%d as described in the // PAX specification. This function is capable of negative timestamps. func formatPAXTime(ts time.Time) (s string) { secs, nsecs := ts.Unix(), ts.Nanosecond() if nsecs == 0 { return strconv.FormatInt(secs, 10) } // If seconds is negative, then perform correction. sign := "" if secs < 0 { sign = "-" // Remember sign secs = -(secs + 1) // Add a second to secs nsecs = -(nsecs - 1E9) // Take that second away from nsecs } return strings.TrimRight(fmt.Sprintf("%s%d.%09d", sign, secs, nsecs), "0") } // parsePAXRecord parses the input PAX record string into a key-value pair. // If parsing is successful, it will slice off the currently read record and // return the remainder as r. func parsePAXRecord(s string) (k, v, r string, err error) { // The size field ends at the first space. sp := strings.IndexByte(s, ' ') if sp == -1 { return "", "", s, ErrHeader } // Parse the first token as a decimal integer. n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int if perr != nil || n < 5 || int64(len(s)) < n { return "", "", s, ErrHeader } // Extract everything between the space and the final newline. rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:] if nl != "\n" { return "", "", s, ErrHeader } // The first equals separates the key from the value. eq := strings.IndexByte(rec, '=') if eq == -1 { return "", "", s, ErrHeader } k, v = rec[:eq], rec[eq+1:] if !validPAXRecord(k, v) { return "", "", s, ErrHeader } return k, v, rem, nil } // formatPAXRecord formats a single PAX record, prefixing it with the // appropriate length. func formatPAXRecord(k, v string) (string, error) { if !validPAXRecord(k, v) { return "", ErrHeader } const padding = 3 // Extra padding for ' ', '=', and '\n' size := len(k) + len(v) + padding size += len(strconv.Itoa(size)) record := strconv.Itoa(size) + " " + k + "=" + v + "\n" // Final adjustment if adding size field increased the record size. if len(record) != size { size = len(record) record = strconv.Itoa(size) + " " + k + "=" + v + "\n" } return record, nil } // validPAXRecord reports whether the key-value pair is valid where each // record is formatted as: // "%d %s=%s\n" % (size, key, value) // // Keys and values should be UTF-8, but the number of bad writers out there // forces us to be a more liberal. // Thus, we only reject all keys with NUL, and only reject NULs in values // for the PAX version of the USTAR string fields. // The key must not contain an '=' character. func validPAXRecord(k, v string) bool { if k == "" || strings.IndexByte(k, '=') >= 0 { return false } switch k { case paxPath, paxLinkpath, paxUname, paxGname: return !hasNUL(v) default: return !hasNUL(k) } } vbatts-tar-split-6881021/archive/tar/strconv_test.go000066400000000000000000000327151467537433400224500ustar00rootroot00000000000000// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package tar import ( "math" "strings" "testing" "time" ) func TestFitsInBase256(t *testing.T) { vectors := []struct { in int64 width int ok bool }{ {+1, 8, true}, {0, 8, true}, {-1, 8, true}, {1 << 56, 8, false}, {(1 << 56) - 1, 8, true}, {-1 << 56, 8, true}, {(-1 << 56) - 1, 8, false}, {121654, 8, true}, {-9849849, 8, true}, {math.MaxInt64, 9, true}, {0, 9, true}, {math.MinInt64, 9, true}, {math.MaxInt64, 12, true}, {0, 12, true}, {math.MinInt64, 12, true}, } for _, v := range vectors { ok := fitsInBase256(v.width, v.in) if ok != v.ok { t.Errorf("fitsInBase256(%d, %d): got %v, want %v", v.in, v.width, ok, v.ok) } } } func TestParseNumeric(t *testing.T) { vectors := []struct { in string want int64 ok bool }{ // Test base-256 (binary) encoded values. {"", 0, true}, {"\x80", 0, true}, {"\x80\x00", 0, true}, {"\x80\x00\x00", 0, true}, {"\xbf", (1 << 6) - 1, true}, {"\xbf\xff", (1 << 14) - 1, true}, {"\xbf\xff\xff", (1 << 22) - 1, true}, {"\xff", -1, true}, {"\xff\xff", -1, true}, {"\xff\xff\xff", -1, true}, {"\xc0", -1 * (1 << 6), true}, {"\xc0\x00", -1 * (1 << 14), true}, {"\xc0\x00\x00", -1 * (1 << 22), true}, {"\x87\x76\xa2\x22\xeb\x8a\x72\x61", 537795476381659745, true}, {"\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", 537795476381659745, true}, {"\xf7\x76\xa2\x22\xeb\x8a\x72\x61", -615126028225187231, true}, {"\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", -615126028225187231, true}, {"\x80\x7f\xff\xff\xff\xff\xff\xff\xff", math.MaxInt64, true}, {"\x80\x80\x00\x00\x00\x00\x00\x00\x00", 0, false}, {"\xff\x80\x00\x00\x00\x00\x00\x00\x00", math.MinInt64, true}, {"\xff\x7f\xff\xff\xff\xff\xff\xff\xff", 0, false}, {"\xf5\xec\xd1\xc7\x7e\x5f\x26\x48\x81\x9f\x8f\x9b", 0, false}, // Test base-8 (octal) encoded values. {"0000000\x00", 0, true}, {" \x0000000\x00", 0, true}, {" \x0000003\x00", 3, true}, {"00000000227\x00", 0227, true}, {"032033\x00 ", 032033, true}, {"320330\x00 ", 0320330, true}, {"0000660\x00 ", 0660, true}, {"\x00 0000660\x00 ", 0660, true}, {"0123456789abcdef", 0, false}, {"0123456789\x00abcdef", 0, false}, {"01234567\x0089abcdef", 342391, true}, {"0123\x7e\x5f\x264123", 0, false}, } for _, v := range vectors { var p parser got := p.parseNumeric([]byte(v.in)) ok := (p.err == nil) if ok != v.ok { if v.ok { t.Errorf("parseNumeric(%q): got parsing failure, want success", v.in) } else { t.Errorf("parseNumeric(%q): got parsing success, want failure", v.in) } } if ok && got != v.want { t.Errorf("parseNumeric(%q): got %d, want %d", v.in, got, v.want) } } } func TestFormatNumeric(t *testing.T) { vectors := []struct { in int64 want string ok bool }{ // Test base-8 (octal) encoded values. {0, "0\x00", true}, {7, "7\x00", true}, {8, "\x80\x08", true}, {077, "77\x00", true}, {0100, "\x80\x00\x40", true}, {0, "0000000\x00", true}, {0123, "0000123\x00", true}, {07654321, "7654321\x00", true}, {07777777, "7777777\x00", true}, {010000000, "\x80\x00\x00\x00\x00\x20\x00\x00", true}, {0, "00000000000\x00", true}, {000001234567, "00001234567\x00", true}, {076543210321, "76543210321\x00", true}, {012345670123, "12345670123\x00", true}, {077777777777, "77777777777\x00", true}, {0100000000000, "\x80\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00", true}, {math.MaxInt64, "777777777777777777777\x00", true}, // Test base-256 (binary) encoded values. {-1, "\xff", true}, {-1, "\xff\xff", true}, {-1, "\xff\xff\xff", true}, {(1 << 0), "0", false}, {(1 << 8) - 1, "\x80\xff", true}, {(1 << 8), "0\x00", false}, {(1 << 16) - 1, "\x80\xff\xff", true}, {(1 << 16), "00\x00", false}, {-1 * (1 << 0), "\xff", true}, {-1*(1<<0) - 1, "0", false}, {-1 * (1 << 8), "\xff\x00", true}, {-1*(1<<8) - 1, "0\x00", false}, {-1 * (1 << 16), "\xff\x00\x00", true}, {-1*(1<<16) - 1, "00\x00", false}, {537795476381659745, "0000000\x00", false}, {537795476381659745, "\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", true}, {-615126028225187231, "0000000\x00", false}, {-615126028225187231, "\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", true}, {math.MaxInt64, "0000000\x00", false}, {math.MaxInt64, "\x80\x00\x00\x00\x7f\xff\xff\xff\xff\xff\xff\xff", true}, {math.MinInt64, "0000000\x00", false}, {math.MinInt64, "\xff\xff\xff\xff\x80\x00\x00\x00\x00\x00\x00\x00", true}, {math.MaxInt64, "\x80\x7f\xff\xff\xff\xff\xff\xff\xff", true}, {math.MinInt64, "\xff\x80\x00\x00\x00\x00\x00\x00\x00", true}, } for _, v := range vectors { var f formatter got := make([]byte, len(v.want)) f.formatNumeric(got, v.in) ok := (f.err == nil) if ok != v.ok { if v.ok { t.Errorf("formatNumeric(%d): got formatting failure, want success", v.in) } else { t.Errorf("formatNumeric(%d): got formatting success, want failure", v.in) } } if string(got) != v.want { t.Errorf("formatNumeric(%d): got %q, want %q", v.in, got, v.want) } } } func TestFitsInOctal(t *testing.T) { vectors := []struct { input int64 width int ok bool }{ {-1, 1, false}, {-1, 2, false}, {-1, 3, false}, {0, 1, true}, {0 + 1, 1, false}, {0, 2, true}, {07, 2, true}, {07 + 1, 2, false}, {0, 4, true}, {0777, 4, true}, {0777 + 1, 4, false}, {0, 8, true}, {07777777, 8, true}, {07777777 + 1, 8, false}, {0, 12, true}, {077777777777, 12, true}, {077777777777 + 1, 12, false}, {math.MaxInt64, 22, true}, {012345670123, 12, true}, {01564164, 12, true}, {-012345670123, 12, false}, {-01564164, 12, false}, {-1564164, 30, false}, } for _, v := range vectors { ok := fitsInOctal(v.width, v.input) if ok != v.ok { t.Errorf("checkOctal(%d, %d): got %v, want %v", v.input, v.width, ok, v.ok) } } } func TestParsePAXTime(t *testing.T) { vectors := []struct { in string want time.Time ok bool }{ {"1350244992.023960108", time.Unix(1350244992, 23960108), true}, {"1350244992.02396010", time.Unix(1350244992, 23960100), true}, {"1350244992.0239601089", time.Unix(1350244992, 23960108), true}, {"1350244992.3", time.Unix(1350244992, 300000000), true}, {"1350244992", time.Unix(1350244992, 0), true}, {"-1.000000001", time.Unix(-1, -1e0+0e0), true}, {"-1.000001", time.Unix(-1, -1e3+0e0), true}, {"-1.001000", time.Unix(-1, -1e6+0e0), true}, {"-1", time.Unix(-1, -0e0+0e0), true}, {"-1.999000", time.Unix(-1, -1e9+1e6), true}, {"-1.999999", time.Unix(-1, -1e9+1e3), true}, {"-1.999999999", time.Unix(-1, -1e9+1e0), true}, {"0.000000001", time.Unix(0, 1e0+0e0), true}, {"0.000001", time.Unix(0, 1e3+0e0), true}, {"0.001000", time.Unix(0, 1e6+0e0), true}, {"0", time.Unix(0, 0e0), true}, {"0.999000", time.Unix(0, 1e9-1e6), true}, {"0.999999", time.Unix(0, 1e9-1e3), true}, {"0.999999999", time.Unix(0, 1e9-1e0), true}, {"1.000000001", time.Unix(+1, +1e0-0e0), true}, {"1.000001", time.Unix(+1, +1e3-0e0), true}, {"1.001000", time.Unix(+1, +1e6-0e0), true}, {"1", time.Unix(+1, +0e0-0e0), true}, {"1.999000", time.Unix(+1, +1e9-1e6), true}, {"1.999999", time.Unix(+1, +1e9-1e3), true}, {"1.999999999", time.Unix(+1, +1e9-1e0), true}, {"-1350244992.023960108", time.Unix(-1350244992, -23960108), true}, {"-1350244992.02396010", time.Unix(-1350244992, -23960100), true}, {"-1350244992.0239601089", time.Unix(-1350244992, -23960108), true}, {"-1350244992.3", time.Unix(-1350244992, -300000000), true}, {"-1350244992", time.Unix(-1350244992, 0), true}, {"", time.Time{}, false}, {"0", time.Unix(0, 0), true}, {"1.", time.Unix(1, 0), true}, {"0.0", time.Unix(0, 0), true}, {".5", time.Time{}, false}, {"-1.3", time.Unix(-1, -3e8), true}, {"-1.0", time.Unix(-1, -0e0), true}, {"-0.0", time.Unix(-0, -0e0), true}, {"-0.1", time.Unix(-0, -1e8), true}, {"-0.01", time.Unix(-0, -1e7), true}, {"-0.99", time.Unix(-0, -99e7), true}, {"-0.98", time.Unix(-0, -98e7), true}, {"-1.1", time.Unix(-1, -1e8), true}, {"-1.01", time.Unix(-1, -1e7), true}, {"-2.99", time.Unix(-2, -99e7), true}, {"-5.98", time.Unix(-5, -98e7), true}, {"-", time.Time{}, false}, {"+", time.Time{}, false}, {"-1.-1", time.Time{}, false}, {"99999999999999999999999999999999999999999999999", time.Time{}, false}, {"0.123456789abcdef", time.Time{}, false}, {"foo", time.Time{}, false}, {"\x00", time.Time{}, false}, {"𝟵𝟴𝟳𝟲𝟱.𝟰𝟯𝟮𝟭𝟬", time.Time{}, false}, // Unicode numbers (U+1D7EC to U+1D7F5) {"98765﹒43210", time.Time{}, false}, // Unicode period (U+FE52) } for _, v := range vectors { ts, err := parsePAXTime(v.in) ok := (err == nil) if v.ok != ok { if v.ok { t.Errorf("parsePAXTime(%q): got parsing failure, want success", v.in) } else { t.Errorf("parsePAXTime(%q): got parsing success, want failure", v.in) } } if ok && !ts.Equal(v.want) { t.Errorf("parsePAXTime(%q): got (%ds %dns), want (%ds %dns)", v.in, ts.Unix(), ts.Nanosecond(), v.want.Unix(), v.want.Nanosecond()) } } } func TestFormatPAXTime(t *testing.T) { vectors := []struct { sec, nsec int64 want string }{ {1350244992, 0, "1350244992"}, {1350244992, 300000000, "1350244992.3"}, {1350244992, 23960100, "1350244992.0239601"}, {1350244992, 23960108, "1350244992.023960108"}, {+1, +1E9 - 1E0, "1.999999999"}, {+1, +1E9 - 1E3, "1.999999"}, {+1, +1E9 - 1E6, "1.999"}, {+1, +0E0 - 0E0, "1"}, {+1, +1E6 - 0E0, "1.001"}, {+1, +1E3 - 0E0, "1.000001"}, {+1, +1E0 - 0E0, "1.000000001"}, {0, 1E9 - 1E0, "0.999999999"}, {0, 1E9 - 1E3, "0.999999"}, {0, 1E9 - 1E6, "0.999"}, {0, 0E0, "0"}, {0, 1E6 + 0E0, "0.001"}, {0, 1E3 + 0E0, "0.000001"}, {0, 1E0 + 0E0, "0.000000001"}, {-1, -1E9 + 1E0, "-1.999999999"}, {-1, -1E9 + 1E3, "-1.999999"}, {-1, -1E9 + 1E6, "-1.999"}, {-1, -0E0 + 0E0, "-1"}, {-1, -1E6 + 0E0, "-1.001"}, {-1, -1E3 + 0E0, "-1.000001"}, {-1, -1E0 + 0E0, "-1.000000001"}, {-1350244992, 0, "-1350244992"}, {-1350244992, -300000000, "-1350244992.3"}, {-1350244992, -23960100, "-1350244992.0239601"}, {-1350244992, -23960108, "-1350244992.023960108"}, } for _, v := range vectors { got := formatPAXTime(time.Unix(v.sec, v.nsec)) if got != v.want { t.Errorf("formatPAXTime(%ds, %dns): got %q, want %q", v.sec, v.nsec, got, v.want) } } } func TestParsePAXRecord(t *testing.T) { medName := strings.Repeat("CD", 50) longName := strings.Repeat("AB", 100) vectors := []struct { in string wantRes string wantKey string wantVal string ok bool }{ {"6 k=v\n\n", "\n", "k", "v", true}, {"19 path=/etc/hosts\n", "", "path", "/etc/hosts", true}, {"210 path=" + longName + "\nabc", "abc", "path", longName, true}, {"110 path=" + medName + "\n", "", "path", medName, true}, {"9 foo=ba\n", "", "foo", "ba", true}, {"11 foo=bar\n\x00", "\x00", "foo", "bar", true}, {"18 foo=b=\nar=\n==\x00\n", "", "foo", "b=\nar=\n==\x00", true}, {"27 foo=hello9 foo=ba\nworld\n", "", "foo", "hello9 foo=ba\nworld", true}, {"27 ☺☻☹=日a本b語ç\nmeow mix", "meow mix", "☺☻☹", "日a本b語ç", true}, {"17 \x00hello=\x00world\n", "17 \x00hello=\x00world\n", "", "", false}, {"1 k=1\n", "1 k=1\n", "", "", false}, {"6 k~1\n", "6 k~1\n", "", "", false}, {"6_k=1\n", "6_k=1\n", "", "", false}, {"6 k=1 ", "6 k=1 ", "", "", false}, {"632 k=1\n", "632 k=1\n", "", "", false}, {"16 longkeyname=hahaha\n", "16 longkeyname=hahaha\n", "", "", false}, {"3 somelongkey=\n", "3 somelongkey=\n", "", "", false}, {"50 tooshort=\n", "50 tooshort=\n", "", "", false}, } for _, v := range vectors { key, val, res, err := parsePAXRecord(v.in) ok := (err == nil) if ok != v.ok { if v.ok { t.Errorf("parsePAXRecord(%q): got parsing failure, want success", v.in) } else { t.Errorf("parsePAXRecord(%q): got parsing success, want failure", v.in) } } if v.ok && (key != v.wantKey || val != v.wantVal) { t.Errorf("parsePAXRecord(%q): got (%q: %q), want (%q: %q)", v.in, key, val, v.wantKey, v.wantVal) } if res != v.wantRes { t.Errorf("parsePAXRecord(%q): got residual %q, want residual %q", v.in, res, v.wantRes) } } } func TestFormatPAXRecord(t *testing.T) { medName := strings.Repeat("CD", 50) longName := strings.Repeat("AB", 100) vectors := []struct { inKey string inVal string want string ok bool }{ {"k", "v", "6 k=v\n", true}, {"path", "/etc/hosts", "19 path=/etc/hosts\n", true}, {"path", longName, "210 path=" + longName + "\n", true}, {"path", medName, "110 path=" + medName + "\n", true}, {"foo", "ba", "9 foo=ba\n", true}, {"foo", "bar", "11 foo=bar\n", true}, {"foo", "b=\nar=\n==\x00", "18 foo=b=\nar=\n==\x00\n", true}, {"foo", "hello9 foo=ba\nworld", "27 foo=hello9 foo=ba\nworld\n", true}, {"☺☻☹", "日a本b語ç", "27 ☺☻☹=日a本b語ç\n", true}, {"xhello", "\x00world", "17 xhello=\x00world\n", true}, {"path", "null\x00", "", false}, {"null\x00", "value", "", false}, {paxSchilyXattr + "key", "null\x00", "26 SCHILY.xattr.key=null\x00\n", true}, } for _, v := range vectors { got, err := formatPAXRecord(v.inKey, v.inVal) ok := (err == nil) if ok != v.ok { if v.ok { t.Errorf("formatPAXRecord(%q, %q): got format failure, want success", v.inKey, v.inVal) } else { t.Errorf("formatPAXRecord(%q, %q): got format success, want failure", v.inKey, v.inVal) } } if got != v.want { t.Errorf("formatPAXRecord(%q, %q): got %q, want %q", v.inKey, v.inVal, got, v.want) } } } vbatts-tar-split-6881021/archive/tar/tar_test.go000066400000000000000000000564121467537433400215400ustar00rootroot00000000000000// Copyright 2012 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package tar import ( "bytes" "errors" "fmt" "io" "math" "os" "path" "path/filepath" "reflect" "runtime" "strings" "testing" "time" ) type testError struct{ error } type fileOps []interface{} // []T where T is (string | int64) // testFile is an io.ReadWriteSeeker where the IO operations performed // on it must match the list of operations in ops. type testFile struct { ops fileOps pos int64 } func (f *testFile) Read(b []byte) (int, error) { if len(b) == 0 { return 0, nil } if len(f.ops) == 0 { return 0, io.EOF } s, ok := f.ops[0].(string) if !ok { return 0, errors.New("unexpected Read operation") } n := copy(b, s) if len(s) > n { f.ops[0] = s[n:] } else { f.ops = f.ops[1:] } f.pos += int64(len(b)) return n, nil } func (f *testFile) Write(b []byte) (int, error) { if len(b) == 0 { return 0, nil } if len(f.ops) == 0 { return 0, errors.New("unexpected Write operation") } s, ok := f.ops[0].(string) if !ok { return 0, errors.New("unexpected Write operation") } if !strings.HasPrefix(s, string(b)) { return 0, testError{fmt.Errorf("got Write(%q), want Write(%q)", b, s)} } if len(s) > len(b) { f.ops[0] = s[len(b):] } else { f.ops = f.ops[1:] } f.pos += int64(len(b)) return len(b), nil } func (f *testFile) Seek(pos int64, whence int) (int64, error) { if pos == 0 && whence == io.SeekCurrent { return f.pos, nil } if len(f.ops) == 0 { return 0, errors.New("unexpected Seek operation") } s, ok := f.ops[0].(int64) if !ok { return 0, errors.New("unexpected Seek operation") } if s != pos || whence != io.SeekCurrent { return 0, testError{fmt.Errorf("got Seek(%d, %d), want Seek(%d, %d)", pos, whence, s, io.SeekCurrent)} } f.pos += s f.ops = f.ops[1:] return f.pos, nil } func equalSparseEntries(x, y []sparseEntry) bool { return (len(x) == 0 && len(y) == 0) || reflect.DeepEqual(x, y) } func TestSparseEntries(t *testing.T) { vectors := []struct { in []sparseEntry size int64 wantValid bool // Result of validateSparseEntries wantAligned []sparseEntry // Result of alignSparseEntries wantInverted []sparseEntry // Result of invertSparseEntries }{{ in: []sparseEntry{}, size: 0, wantValid: true, wantInverted: []sparseEntry{{0, 0}}, }, { in: []sparseEntry{}, size: 5000, wantValid: true, wantInverted: []sparseEntry{{0, 5000}}, }, { in: []sparseEntry{{0, 5000}}, size: 5000, wantValid: true, wantAligned: []sparseEntry{{0, 5000}}, wantInverted: []sparseEntry{{5000, 0}}, }, { in: []sparseEntry{{1000, 4000}}, size: 5000, wantValid: true, wantAligned: []sparseEntry{{1024, 3976}}, wantInverted: []sparseEntry{{0, 1000}, {5000, 0}}, }, { in: []sparseEntry{{0, 3000}}, size: 5000, wantValid: true, wantAligned: []sparseEntry{{0, 2560}}, wantInverted: []sparseEntry{{3000, 2000}}, }, { in: []sparseEntry{{3000, 2000}}, size: 5000, wantValid: true, wantAligned: []sparseEntry{{3072, 1928}}, wantInverted: []sparseEntry{{0, 3000}, {5000, 0}}, }, { in: []sparseEntry{{2000, 2000}}, size: 5000, wantValid: true, wantAligned: []sparseEntry{{2048, 1536}}, wantInverted: []sparseEntry{{0, 2000}, {4000, 1000}}, }, { in: []sparseEntry{{0, 2000}, {8000, 2000}}, size: 10000, wantValid: true, wantAligned: []sparseEntry{{0, 1536}, {8192, 1808}}, wantInverted: []sparseEntry{{2000, 6000}, {10000, 0}}, }, { in: []sparseEntry{{0, 2000}, {2000, 2000}, {4000, 0}, {4000, 3000}, {7000, 1000}, {8000, 0}, {8000, 2000}}, size: 10000, wantValid: true, wantAligned: []sparseEntry{{0, 1536}, {2048, 1536}, {4096, 2560}, {7168, 512}, {8192, 1808}}, wantInverted: []sparseEntry{{10000, 0}}, }, { in: []sparseEntry{{0, 0}, {1000, 0}, {2000, 0}, {3000, 0}, {4000, 0}, {5000, 0}}, size: 5000, wantValid: true, wantInverted: []sparseEntry{{0, 5000}}, }, { in: []sparseEntry{{1, 0}}, size: 0, wantValid: false, }, { in: []sparseEntry{{-1, 0}}, size: 100, wantValid: false, }, { in: []sparseEntry{{0, -1}}, size: 100, wantValid: false, }, { in: []sparseEntry{{0, 0}}, size: -100, wantValid: false, }, { in: []sparseEntry{{math.MaxInt64, 3}, {6, -5}}, size: 35, wantValid: false, }, { in: []sparseEntry{{1, 3}, {6, -5}}, size: 35, wantValid: false, }, { in: []sparseEntry{{math.MaxInt64, math.MaxInt64}}, size: math.MaxInt64, wantValid: false, }, { in: []sparseEntry{{3, 3}}, size: 5, wantValid: false, }, { in: []sparseEntry{{2, 0}, {1, 0}, {0, 0}}, size: 3, wantValid: false, }, { in: []sparseEntry{{1, 3}, {2, 2}}, size: 10, wantValid: false, }} for i, v := range vectors { gotValid := validateSparseEntries(v.in, v.size) if gotValid != v.wantValid { t.Errorf("test %d, validateSparseEntries() = %v, want %v", i, gotValid, v.wantValid) } if !v.wantValid { continue } gotAligned := alignSparseEntries(append([]sparseEntry{}, v.in...), v.size) if !equalSparseEntries(gotAligned, v.wantAligned) { t.Errorf("test %d, alignSparseEntries():\ngot %v\nwant %v", i, gotAligned, v.wantAligned) } gotInverted := invertSparseEntries(append([]sparseEntry{}, v.in...), v.size) if !equalSparseEntries(gotInverted, v.wantInverted) { t.Errorf("test %d, inverseSparseEntries():\ngot %v\nwant %v", i, gotInverted, v.wantInverted) } } } func TestFileInfoHeader(t *testing.T) { fi, err := os.Stat("testdata/small.txt") if err != nil { t.Fatal(err) } h, err := FileInfoHeader(fi, "") if err != nil { t.Fatalf("FileInfoHeader: %v", err) } if g, e := h.Name, "small.txt"; g != e { t.Errorf("Name = %q; want %q", g, e) } if g, e := h.Mode, int64(fi.Mode().Perm()); g != e { t.Errorf("Mode = %#o; want %#o", g, e) } if g, e := h.Size, int64(5); g != e { t.Errorf("Size = %v; want %v", g, e) } if g, e := h.ModTime, fi.ModTime(); !g.Equal(e) { t.Errorf("ModTime = %v; want %v", g, e) } // FileInfoHeader should error when passing nil FileInfo if _, err := FileInfoHeader(nil, ""); err == nil { t.Fatalf("Expected error when passing nil to FileInfoHeader") } } func TestFileInfoHeaderDir(t *testing.T) { fi, err := os.Stat("testdata") if err != nil { t.Fatal(err) } h, err := FileInfoHeader(fi, "") if err != nil { t.Fatalf("FileInfoHeader: %v", err) } if g, e := h.Name, "testdata/"; g != e { t.Errorf("Name = %q; want %q", g, e) } // Ignoring c_ISGID for golang.org/issue/4867 if g, e := h.Mode&^c_ISGID, int64(fi.Mode().Perm()); g != e { t.Errorf("Mode = %#o; want %#o", g, e) } if g, e := h.Size, int64(0); g != e { t.Errorf("Size = %v; want %v", g, e) } if g, e := h.ModTime, fi.ModTime(); !g.Equal(e) { t.Errorf("ModTime = %v; want %v", g, e) } } func TestFileInfoHeaderSymlink(t *testing.T) { switch runtime.GOOS { case "android", "nacl", "plan9", "windows": t.Skip("symlinks not supported") } tmpdir, err := os.MkdirTemp("", "TestFileInfoHeaderSymlink") if err != nil { t.Fatal(err) } defer os.RemoveAll(tmpdir) link := filepath.Join(tmpdir, "link") target := tmpdir err = os.Symlink(target, link) if err != nil { t.Fatal(err) } fi, err := os.Lstat(link) if err != nil { t.Fatal(err) } h, err := FileInfoHeader(fi, target) if err != nil { t.Fatal(err) } if g, e := h.Name, fi.Name(); g != e { t.Errorf("Name = %q; want %q", g, e) } if g, e := h.Linkname, target; g != e { t.Errorf("Linkname = %q; want %q", g, e) } if g, e := h.Typeflag, byte(TypeSymlink); g != e { t.Errorf("Typeflag = %v; want %v", g, e) } } func TestRoundTrip(t *testing.T) { data := []byte("some file contents") var b bytes.Buffer tw := NewWriter(&b) hdr := &Header{ Name: "file.txt", Uid: 1 << 21, // Too big for 8 octal digits Size: int64(len(data)), ModTime: time.Now().Round(time.Second), PAXRecords: map[string]string{"uid": "2097152"}, Format: FormatPAX, Typeflag: TypeReg, } if err := tw.WriteHeader(hdr); err != nil { t.Fatalf("tw.WriteHeader: %v", err) } if _, err := tw.Write(data); err != nil { t.Fatalf("tw.Write: %v", err) } if err := tw.Close(); err != nil { t.Fatalf("tw.Close: %v", err) } // Read it back. tr := NewReader(&b) rHdr, err := tr.Next() if err != nil { t.Fatalf("tr.Next: %v", err) } if !reflect.DeepEqual(rHdr, hdr) { t.Errorf("Header mismatch.\n got %+v\nwant %+v", rHdr, hdr) } rData, err := io.ReadAll(tr) if err != nil { t.Fatalf("Read: %v", err) } if !bytes.Equal(rData, data) { t.Errorf("Data mismatch.\n got %q\nwant %q", rData, data) } } type headerRoundTripTest struct { h *Header fm os.FileMode } func TestHeaderRoundTrip(t *testing.T) { vectors := []headerRoundTripTest{{ // regular file. h: &Header{ Name: "test.txt", Mode: 0644, Size: 12, ModTime: time.Unix(1360600916, 0), Typeflag: TypeReg, }, fm: 0644, }, { // symbolic link. h: &Header{ Name: "link.txt", Mode: 0777, Size: 0, ModTime: time.Unix(1360600852, 0), Typeflag: TypeSymlink, }, fm: 0777 | os.ModeSymlink, }, { // character device node. h: &Header{ Name: "dev/null", Mode: 0666, Size: 0, ModTime: time.Unix(1360578951, 0), Typeflag: TypeChar, }, fm: 0666 | os.ModeDevice | os.ModeCharDevice, }, { // block device node. h: &Header{ Name: "dev/sda", Mode: 0660, Size: 0, ModTime: time.Unix(1360578954, 0), Typeflag: TypeBlock, }, fm: 0660 | os.ModeDevice, }, { // directory. h: &Header{ Name: "dir/", Mode: 0755, Size: 0, ModTime: time.Unix(1360601116, 0), Typeflag: TypeDir, }, fm: 0755 | os.ModeDir, }, { // fifo node. h: &Header{ Name: "dev/initctl", Mode: 0600, Size: 0, ModTime: time.Unix(1360578949, 0), Typeflag: TypeFifo, }, fm: 0600 | os.ModeNamedPipe, }, { // setuid. h: &Header{ Name: "bin/su", Mode: 0755 | c_ISUID, Size: 23232, ModTime: time.Unix(1355405093, 0), Typeflag: TypeReg, }, fm: 0755 | os.ModeSetuid, }, { // setguid. h: &Header{ Name: "group.txt", Mode: 0750 | c_ISGID, Size: 0, ModTime: time.Unix(1360602346, 0), Typeflag: TypeReg, }, fm: 0750 | os.ModeSetgid, }, { // sticky. h: &Header{ Name: "sticky.txt", Mode: 0600 | c_ISVTX, Size: 7, ModTime: time.Unix(1360602540, 0), Typeflag: TypeReg, }, fm: 0600 | os.ModeSticky, }, { // hard link. h: &Header{ Name: "hard.txt", Mode: 0644, Size: 0, Linkname: "file.txt", ModTime: time.Unix(1360600916, 0), Typeflag: TypeLink, }, fm: 0644, }, { // More information. h: &Header{ Name: "info.txt", Mode: 0600, Size: 0, Uid: 1000, Gid: 1000, ModTime: time.Unix(1360602540, 0), Uname: "slartibartfast", Gname: "users", Typeflag: TypeReg, }, fm: 0600, }} for i, v := range vectors { fi := v.h.FileInfo() h2, err := FileInfoHeader(fi, "") if err != nil { t.Error(err) continue } if strings.Contains(fi.Name(), "/") { t.Errorf("FileInfo of %q contains slash: %q", v.h.Name, fi.Name()) } name := path.Base(v.h.Name) if fi.IsDir() { name += "/" } if got, want := h2.Name, name; got != want { t.Errorf("i=%d: Name: got %v, want %v", i, got, want) } if got, want := h2.Size, v.h.Size; got != want { t.Errorf("i=%d: Size: got %v, want %v", i, got, want) } if got, want := h2.Uid, v.h.Uid; got != want { t.Errorf("i=%d: Uid: got %d, want %d", i, got, want) } if got, want := h2.Gid, v.h.Gid; got != want { t.Errorf("i=%d: Gid: got %d, want %d", i, got, want) } if got, want := h2.Uname, v.h.Uname; got != want { t.Errorf("i=%d: Uname: got %q, want %q", i, got, want) } if got, want := h2.Gname, v.h.Gname; got != want { t.Errorf("i=%d: Gname: got %q, want %q", i, got, want) } if got, want := h2.Linkname, v.h.Linkname; got != want { t.Errorf("i=%d: Linkname: got %v, want %v", i, got, want) } if got, want := h2.Typeflag, v.h.Typeflag; got != want { t.Logf("%#v %#v", v.h, fi.Sys()) t.Errorf("i=%d: Typeflag: got %q, want %q", i, got, want) } if got, want := h2.Mode, v.h.Mode; got != want { t.Errorf("i=%d: Mode: got %o, want %o", i, got, want) } if got, want := fi.Mode(), v.fm; got != want { t.Errorf("i=%d: fi.Mode: got %o, want %o", i, got, want) } if got, want := h2.AccessTime, v.h.AccessTime; got != want { t.Errorf("i=%d: AccessTime: got %v, want %v", i, got, want) } if got, want := h2.ChangeTime, v.h.ChangeTime; got != want { t.Errorf("i=%d: ChangeTime: got %v, want %v", i, got, want) } if got, want := h2.ModTime, v.h.ModTime; got != want { t.Errorf("i=%d: ModTime: got %v, want %v", i, got, want) } if sysh, ok := fi.Sys().(*Header); !ok || sysh != v.h { t.Errorf("i=%d: Sys didn't return original *Header", i) } } } func TestHeaderAllowedFormats(t *testing.T) { vectors := []struct { header *Header // Input header paxHdrs map[string]string // Expected PAX headers that may be needed formats Format // Expected formats that can encode the header }{{ header: &Header{}, formats: FormatUSTAR | FormatPAX | FormatGNU, }, { header: &Header{Size: 077777777777}, formats: FormatUSTAR | FormatPAX | FormatGNU, }, { header: &Header{Size: 077777777777, Format: FormatUSTAR}, formats: FormatUSTAR, }, { header: &Header{Size: 077777777777, Format: FormatPAX}, formats: FormatUSTAR | FormatPAX, }, { header: &Header{Size: 077777777777, Format: FormatGNU}, formats: FormatGNU, }, { header: &Header{Size: 077777777777 + 1}, paxHdrs: map[string]string{paxSize: "8589934592"}, formats: FormatPAX | FormatGNU, }, { header: &Header{Size: 077777777777 + 1, Format: FormatPAX}, paxHdrs: map[string]string{paxSize: "8589934592"}, formats: FormatPAX, }, { header: &Header{Size: 077777777777 + 1, Format: FormatGNU}, paxHdrs: map[string]string{paxSize: "8589934592"}, formats: FormatGNU, }, { header: &Header{Mode: 07777777}, formats: FormatUSTAR | FormatPAX | FormatGNU, }, { header: &Header{Mode: 07777777 + 1}, formats: FormatGNU, }, { header: &Header{Devmajor: -123}, formats: FormatGNU, }, { header: &Header{Devmajor: 1<<56 - 1}, formats: FormatGNU, }, { header: &Header{Devmajor: 1 << 56}, formats: FormatUnknown, }, { header: &Header{Devmajor: -1 << 56}, formats: FormatGNU, }, { header: &Header{Devmajor: -1<<56 - 1}, formats: FormatUnknown, }, { header: &Header{Name: "用戶名", Devmajor: -1 << 56}, formats: FormatGNU, }, { header: &Header{Size: math.MaxInt64}, paxHdrs: map[string]string{paxSize: "9223372036854775807"}, formats: FormatPAX | FormatGNU, }, { header: &Header{Size: math.MinInt64}, paxHdrs: map[string]string{paxSize: "-9223372036854775808"}, formats: FormatUnknown, }, { header: &Header{Uname: "0123456789abcdef0123456789abcdef"}, formats: FormatUSTAR | FormatPAX | FormatGNU, }, { header: &Header{Uname: "0123456789abcdef0123456789abcdefx"}, paxHdrs: map[string]string{paxUname: "0123456789abcdef0123456789abcdefx"}, formats: FormatPAX, }, { header: &Header{Name: "foobar"}, formats: FormatUSTAR | FormatPAX | FormatGNU, }, { header: &Header{Name: strings.Repeat("a", nameSize)}, formats: FormatUSTAR | FormatPAX | FormatGNU, }, { header: &Header{Name: strings.Repeat("a", nameSize+1)}, paxHdrs: map[string]string{paxPath: strings.Repeat("a", nameSize+1)}, formats: FormatPAX | FormatGNU, }, { header: &Header{Linkname: "用戶名"}, paxHdrs: map[string]string{paxLinkpath: "用戶名"}, formats: FormatPAX | FormatGNU, }, { header: &Header{Linkname: strings.Repeat("用戶名\x00", nameSize)}, paxHdrs: map[string]string{paxLinkpath: strings.Repeat("用戶名\x00", nameSize)}, formats: FormatUnknown, }, { header: &Header{Linkname: "\x00hello"}, paxHdrs: map[string]string{paxLinkpath: "\x00hello"}, formats: FormatUnknown, }, { header: &Header{Uid: 07777777}, formats: FormatUSTAR | FormatPAX | FormatGNU, }, { header: &Header{Uid: 07777777 + 1}, paxHdrs: map[string]string{paxUid: "2097152"}, formats: FormatPAX | FormatGNU, }, { header: &Header{Xattrs: nil}, formats: FormatUSTAR | FormatPAX | FormatGNU, }, { header: &Header{Xattrs: map[string]string{"foo": "bar"}}, paxHdrs: map[string]string{paxSchilyXattr + "foo": "bar"}, formats: FormatPAX, }, { header: &Header{Xattrs: map[string]string{"foo": "bar"}, Format: FormatGNU}, paxHdrs: map[string]string{paxSchilyXattr + "foo": "bar"}, formats: FormatUnknown, }, { header: &Header{Xattrs: map[string]string{"用戶名": "\x00hello"}}, paxHdrs: map[string]string{paxSchilyXattr + "用戶名": "\x00hello"}, formats: FormatPAX, }, { header: &Header{Xattrs: map[string]string{"foo=bar": "baz"}}, formats: FormatUnknown, }, { header: &Header{Xattrs: map[string]string{"foo": ""}}, paxHdrs: map[string]string{paxSchilyXattr + "foo": ""}, formats: FormatPAX, }, { header: &Header{ModTime: time.Unix(0, 0)}, formats: FormatUSTAR | FormatPAX | FormatGNU, }, { header: &Header{ModTime: time.Unix(077777777777, 0)}, formats: FormatUSTAR | FormatPAX | FormatGNU, }, { header: &Header{ModTime: time.Unix(077777777777+1, 0)}, paxHdrs: map[string]string{paxMtime: "8589934592"}, formats: FormatPAX | FormatGNU, }, { header: &Header{ModTime: time.Unix(math.MaxInt64, 0)}, paxHdrs: map[string]string{paxMtime: "9223372036854775807"}, formats: FormatPAX | FormatGNU, }, { header: &Header{ModTime: time.Unix(math.MaxInt64, 0), Format: FormatUSTAR}, paxHdrs: map[string]string{paxMtime: "9223372036854775807"}, formats: FormatUnknown, }, { header: &Header{ModTime: time.Unix(-1, 0)}, paxHdrs: map[string]string{paxMtime: "-1"}, formats: FormatPAX | FormatGNU, }, { header: &Header{ModTime: time.Unix(1, 500)}, paxHdrs: map[string]string{paxMtime: "1.0000005"}, formats: FormatUSTAR | FormatPAX | FormatGNU, }, { header: &Header{ModTime: time.Unix(1, 0)}, formats: FormatUSTAR | FormatPAX | FormatGNU, }, { header: &Header{ModTime: time.Unix(1, 0), Format: FormatPAX}, formats: FormatUSTAR | FormatPAX, }, { header: &Header{ModTime: time.Unix(1, 500), Format: FormatUSTAR}, paxHdrs: map[string]string{paxMtime: "1.0000005"}, formats: FormatUSTAR, }, { header: &Header{ModTime: time.Unix(1, 500), Format: FormatPAX}, paxHdrs: map[string]string{paxMtime: "1.0000005"}, formats: FormatPAX, }, { header: &Header{ModTime: time.Unix(1, 500), Format: FormatGNU}, paxHdrs: map[string]string{paxMtime: "1.0000005"}, formats: FormatGNU, }, { header: &Header{ModTime: time.Unix(-1, 500)}, paxHdrs: map[string]string{paxMtime: "-0.9999995"}, formats: FormatPAX | FormatGNU, }, { header: &Header{ModTime: time.Unix(-1, 500), Format: FormatGNU}, paxHdrs: map[string]string{paxMtime: "-0.9999995"}, formats: FormatGNU, }, { header: &Header{AccessTime: time.Unix(0, 0)}, paxHdrs: map[string]string{paxAtime: "0"}, formats: FormatPAX | FormatGNU, }, { header: &Header{AccessTime: time.Unix(0, 0), Format: FormatUSTAR}, paxHdrs: map[string]string{paxAtime: "0"}, formats: FormatUnknown, }, { header: &Header{AccessTime: time.Unix(0, 0), Format: FormatPAX}, paxHdrs: map[string]string{paxAtime: "0"}, formats: FormatPAX, }, { header: &Header{AccessTime: time.Unix(0, 0), Format: FormatGNU}, paxHdrs: map[string]string{paxAtime: "0"}, formats: FormatGNU, }, { header: &Header{AccessTime: time.Unix(-123, 0)}, paxHdrs: map[string]string{paxAtime: "-123"}, formats: FormatPAX | FormatGNU, }, { header: &Header{AccessTime: time.Unix(-123, 0), Format: FormatPAX}, paxHdrs: map[string]string{paxAtime: "-123"}, formats: FormatPAX, }, { header: &Header{ChangeTime: time.Unix(123, 456)}, paxHdrs: map[string]string{paxCtime: "123.000000456"}, formats: FormatPAX | FormatGNU, }, { header: &Header{ChangeTime: time.Unix(123, 456), Format: FormatUSTAR}, paxHdrs: map[string]string{paxCtime: "123.000000456"}, formats: FormatUnknown, }, { header: &Header{ChangeTime: time.Unix(123, 456), Format: FormatGNU}, paxHdrs: map[string]string{paxCtime: "123.000000456"}, formats: FormatGNU, }, { header: &Header{ChangeTime: time.Unix(123, 456), Format: FormatPAX}, paxHdrs: map[string]string{paxCtime: "123.000000456"}, formats: FormatPAX, }, { header: &Header{Name: "foo/", Typeflag: TypeDir}, formats: FormatUSTAR | FormatPAX | FormatGNU, }, { header: &Header{Name: "foo/", Typeflag: TypeReg}, formats: FormatUnknown, }, { header: &Header{Name: "foo/", Typeflag: TypeSymlink}, formats: FormatUSTAR | FormatPAX | FormatGNU, }} for i, v := range vectors { formats, paxHdrs, err := v.header.allowedFormats() if formats != v.formats { t.Errorf("test %d, allowedFormats(): got %v, want %v", i, formats, v.formats) } if formats&FormatPAX > 0 && !reflect.DeepEqual(paxHdrs, v.paxHdrs) && !(len(paxHdrs) == 0 && len(v.paxHdrs) == 0) { t.Errorf("test %d, allowedFormats():\ngot %v\nwant %s", i, paxHdrs, v.paxHdrs) } if (formats != FormatUnknown) && (err != nil) { t.Errorf("test %d, unexpected error: %v", i, err) } if (formats == FormatUnknown) && (err == nil) { t.Errorf("test %d, got nil-error, want non-nil error", i) } } } func Benchmark(b *testing.B) { type file struct { hdr *Header body []byte } vectors := []struct { label string files []file }{{ "USTAR", []file{{ &Header{Name: "bar", Mode: 0640, Size: int64(3)}, []byte("foo"), }, { &Header{Name: "world", Mode: 0640, Size: int64(5)}, []byte("hello"), }}, }, { "GNU", []file{{ &Header{Name: "bar", Mode: 0640, Size: int64(3), Devmajor: -1}, []byte("foo"), }, { &Header{Name: "world", Mode: 0640, Size: int64(5), Devmajor: -1}, []byte("hello"), }}, }, { "PAX", []file{{ &Header{Name: "bar", Mode: 0640, Size: int64(3), Xattrs: map[string]string{"foo": "bar"}}, []byte("foo"), }, { &Header{Name: "world", Mode: 0640, Size: int64(5), Xattrs: map[string]string{"foo": "bar"}}, []byte("hello"), }}, }} b.Run("Writer", func(b *testing.B) { for _, v := range vectors { b.Run(v.label, func(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { // Writing to io.Discard because we want to // test purely the writer code and not bring in disk performance into this. tw := NewWriter(io.Discard) for _, file := range v.files { if err := tw.WriteHeader(file.hdr); err != nil { b.Errorf("unexpected WriteHeader error: %v", err) } if _, err := tw.Write(file.body); err != nil { b.Errorf("unexpected Write error: %v", err) } } if err := tw.Close(); err != nil { b.Errorf("unexpected Close error: %v", err) } } }) } }) b.Run("Reader", func(b *testing.B) { for _, v := range vectors { var buf bytes.Buffer var r bytes.Reader // Write the archive to a byte buffer. tw := NewWriter(&buf) for _, file := range v.files { _ = tw.WriteHeader(file.hdr) _, _ = tw.Write(file.body) } tw.Close() b.Run(v.label, func(b *testing.B) { b.ReportAllocs() // Read from the byte buffer. for i := 0; i < b.N; i++ { r.Reset(buf.Bytes()) tr := NewReader(&r) if _, err := tr.Next(); err != nil { b.Errorf("unexpected Next error: %v", err) } if _, err := io.Copy(io.Discard, tr); err != nil { b.Errorf("unexpected Copy error : %v", err) } } }) } }) } vbatts-tar-split-6881021/archive/tar/testdata/000077500000000000000000000000001467537433400211655ustar00rootroot00000000000000vbatts-tar-split-6881021/archive/tar/testdata/file-and-dir.tar000066400000000000000000000050001467537433400241230ustar00rootroot00000000000000small.txt0000000000000000000000000000000500000000000011033 0ustar0000000000000000Kiltsdir/0000000000000000000000000000000000000000000007742 5ustar0000000000000000vbatts-tar-split-6881021/archive/tar/testdata/gnu-incremental.tar000066400000000000000000000050001467537433400247600ustar00rootroot00000000000000test2/0040755000175000017500000000001612574542263013224 Dustar rawrdsnet1257454434512574542274YfooYsparsetest2/foo0100644000175000017500000000010012574542163013667 0ustar rawrdsnet1257454434512574542274fewafewa fewa feawfehahaha hahaafwe hahafawe hahawafe a fwefewa test2/sparse0100644000175000017500000000000012574542263017530 Sustar rawrdsnet1257460641412574542274040000000000000000000004000000000vbatts-tar-split-6881021/archive/tar/testdata/gnu-long-nul.tar000066400000000000000000000050001467537433400242120ustar00rootroot00000000000000././@LongLink0000644000000000000000000000024100000000000011600 Lustar rootroot01234567891234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890000644000175000017500000000000013044750217022125 0ustar rawrdsnetvbatts-tar-split-6881021/archive/tar/testdata/gnu-multi-hdrs.tar000066400000000000000000000110001467537433400245440ustar00rootroot00000000000000././@LongLink0000644000000000000000000000031600000000000011603 Lustar rootrootGNU1/GNU1/long-path-name././@LongLink0000644000000000000000000000031600000000000011603 Lustar rootrootGNU2/GNU2/long-path-name././@LongLink0000644000000000000000000000031600000000000011602 Kustar rootrootGNU3/GNU3/long-linkpath-name././@LongLink0000644000000000000000000000031600000000000011602 Kustar rootrootGNU4/GNU4/long-linkpath-namebar0000000000000000000000000000000000000000000007052 2fooustar vbatts-tar-split-6881021/archive/tar/testdata/gnu-nil-sparse-data.tar000066400000000000000000000050001467537433400254430ustar00rootroot00000000000000sparse.db0000000000000000000000000000175000000000000014113 Sustar 000000000000000000000000000000001750000000017500123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789vbatts-tar-split-6881021/archive/tar/testdata/gnu-nil-sparse-hole.tar000066400000000000000000000030001467537433400254570ustar00rootroot00000000000000sparse.db0000000000000000000000000000000000000000000014076 Sustar 00000000000000000000017500000000000000000001750vbatts-tar-split-6881021/archive/tar/testdata/gnu-not-utf8.tar000066400000000000000000000030001467537433400241410ustar00rootroot00000000000000hibye0000644000175000017500000000000000000000000013150 0ustar rawrdsnet00000000000000vbatts-tar-split-6881021/archive/tar/testdata/gnu-sparse-big.tar000066400000000000000000000120001467537433400245110ustar00rootroot00000000000000gnu-sparse0000000000000000000000000000600000000000000022227 Sustar 00000000000000T 0000000100000000001000#00000001000 P/00000001000 GX ;r00000001000 GV00000001000012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789vbatts-tar-split-6881021/archive/tar/testdata/gnu-utf8.tar000066400000000000000000000050001467537433400233450ustar00rootroot00000000000000././@LongLink0000000000000000000000000000024300000000000007754 Lustar ☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹0000644000175000017500000000000000000000000056166 0ustar ☺⚹00000000000000vbatts-tar-split-6881021/archive/tar/testdata/gnu.tar000066400000000000000000000060001467537433400224620ustar00rootroot00000000000000small.txt0000640021650100116100000000000511213074064012105 0ustar dsymondsengKiltssmall2.txt0000640021650100116100000000001311213113114012154 0ustar dsymondsengGoogle.com vbatts-tar-split-6881021/archive/tar/testdata/hardlink.tar000066400000000000000000000050001467537433400234640ustar00rootroot00000000000000file.txt0000644000175000001440000000001712475625017013267 0ustar00vbattsusers00000000000000Slartibartfast hard.txt0000644000175000001440000000000012475625017014735 1file.txtustar00vbattsusers00000000000000vbatts-tar-split-6881021/archive/tar/testdata/hdr-only.tar000066400000000000000000000240001467537433400234250ustar00rootroot00000000000000dir/0000750116074500116100000000000012575654704010646 5ustar joetsaiengfifo0000640116074500116100000000000012575655016010730 6ustar joetsaiengfile0000640116074500116100000000005612575654723010735 0ustar joetsaiengThe quick brown fox jumped over the lazy dog! hardlink0000640116074500116100000000000012575654723012440 1fileustar joetsaiengnull0000666116074500116100000000000012575632775012237 3ustar joetsaieng00000010000003sda0000660116074500116100000000000012575632775012024 4ustar joetsaieng00000100000000symlink0000777116074500116100000000000012575654734012350 2fileustar joetsaiengbadlink0000777116074500116100000000000012575655374013021 2missingustar joetsaiengdir/0000750116074500116100000000000512575654704010653 5ustar joetsaiengfifo0000640116074500116100000000000512575655016010735 6ustar joetsaiengfile0000640116074500116100000000005612575654723010735 0ustar joetsaiengThe quick brown fox jumped over the lazy dog! hardlink0000640116074500116100000000000512575654723012445 1fileustar joetsaiengnull0000666116074500116100000000000512575632775012244 3ustar joetsaieng00000010000003sda0000660116074500116100000000000512575632775012031 4ustar joetsaieng00000100000000symlink0000777116074500116100000000000512575654734012355 2fileustar joetsaiengbadlink0000777116074500116100000000000512575655374013026 2missingustar joetsaiengvbatts-tar-split-6881021/archive/tar/testdata/invalid-go17.tar000066400000000000000000000030001467537433400240670ustar00rootroot00000000000000foo0000000 00000000000000000000000000000031564 ustar 00000000000000aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaavbatts-tar-split-6881021/archive/tar/testdata/issue10968.tar000066400000000000000000000010001467537433400234240ustar00rootroot0000000000000000-821950296ts|s00qwf000011s0100ts|ss0s|ssxSs10100ts|ss0s|ss0qS0t000q0001011s1000t00qfj.S100txS00t000qw010100ts|ssxS00t000qwf000011s10100ts|ss0s|sxSs1000ts|ss00s|sssx100t000q0001s100100t f04011100txS00t000qwf1411vbatts-tar-split-6881021/archive/tar/testdata/issue11169.tar000066400000000000000000000011321467537433400234240ustar00rootroot00000000000000./PaxHeaders.14463/aaa00006440000000000000000000000132125311453710114200xustar0030 00000=00000000000000000000030 00000=00000000000000000000030 00000=000000000000000000000vbatts-tar-split-6881021/archive/tar/testdata/issue12435.tar000066400000000000000000000010001467537433400234130ustar00rootroot00000000000000 00000000KǸvbatts-tar-split-6881021/archive/tar/testdata/neg-size.tar000066400000000000000000000010001467537433400234050ustar00rootroot0000000000000000-821950296ts|s00qwf000011s10100ts|ss0s|ssxSs10100ts|ss0s|ssqS0t000q0001011s1010t00qf115621 0100txS00t000qw010100ts|ssxS00t000qwf000011s10100ts|ss0s|ssxSs10100ts|ss00s|ss0xS00t000q0001s10100t f04011100txS00t000qwf000011sssxSs10100ts|ss311033624846128380s|ssxS00t000q00001011s10100t00qf04s|ss0s|ssxS00t000q00001011s10100t00x0f0vbatts-tar-split-6881021/archive/tar/testdata/nil-uid.tar000066400000000000000000000020001467537433400232260ustar00rootroot00000000000000P1050238.JPG.log00006640000000001612130627766012777 0ustar eyefieyefi121304042001213062776644,44,POWERONvbatts-tar-split-6881021/archive/tar/testdata/pax-bad-hdr-file.tar000066400000000000000000000050001467537433400246740ustar00rootroot00000000000000path/to/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/r0000000000000000000000000000004100000000000032025 xustar000000000000000033 path=PAX1/PAX1/long-path-namefoo0000640116074500116100000000125412575676024010640 0ustar00joetsaiengiRFmWghs3CK9/2HSvRja4TzX8HsRwzbVYl+h0HRkH9uPho2BGmrG5a0vpHsPn2W7Pn33Ux/+rkLSA3GUOX/WiPmP+h73T1r0DZIDJXtOgYWIUhsqUE0zUz1LEaO/y2H+WAe/ZlWt90N2KHka0bkXajoEAdOUrN42PKl/3mu7jiCW45hTNBDp3ArJD8QHN7l3JFMfnusPuir9+K8Oh6bEfN2bHhXjZ41ZkweCHZWUKT8NsdHeObQnXAyvkU5q1OhefE0+uvksVba2ZNyhThAAGZgiqEtTOJJLm8zgcI5avXHMVwlR6mt1jepOct4jQNlAdpkmslKW3BuiwLswGAsw7ttr/pRa/oCT4HUoBWcY3w96+TGR6uXtvbDOM9WhPXGo+1bwhAsA/RXPA1ZX+oS6t4rl/ZvkMZZN4VO5OvKph8tthdG3ocpXUw11zv6mQ7n6kyObLDCMFOtkdnhQBU/BGEK6mw4oTRa1Hd91+bUUqQh6hl3JeDk/t2KDWOEehOxgOqfVG72UuMeo2IayNK/pUXrcUXuywq9KT+bWQxdJsXzwkkyT8Ovz4oiIzHAa14e/Ib8Xxz+BHwpN3TtOXsHziuqLGMzqv867CganwsFxNEGRaTQ6C2bRK+OxetaxhQqe1G/UWwfi5a9PuJC3wfITSa0IhBot9hGAG35VVb4LsRE=vbatts-tar-split-6881021/archive/tar/testdata/pax-bad-mtime-file.tar000066400000000000000000000050001467537433400252320ustar00rootroot00000000000000path/to/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/r0000000000000000000000000000004100000000000032025 xustar000000000000000033 mtime=999xxx9324.432432444444 foo0000640116074500116100000000125412575676024010640 0ustar00joetsaiengiRFmWghs3CK9/2HSvRja4TzX8HsRwzbVYl+h0HRkH9uPho2BGmrG5a0vpHsPn2W7Pn33Ux/+rkLSA3GUOX/WiPmP+h73T1r0DZIDJXtOgYWIUhsqUE0zUz1LEaO/y2H+WAe/ZlWt90N2KHka0bkXajoEAdOUrN42PKl/3mu7jiCW45hTNBDp3ArJD8QHN7l3JFMfnusPuir9+K8Oh6bEfN2bHhXjZ41ZkweCHZWUKT8NsdHeObQnXAyvkU5q1OhefE0+uvksVba2ZNyhThAAGZgiqEtTOJJLm8zgcI5avXHMVwlR6mt1jepOct4jQNlAdpkmslKW3BuiwLswGAsw7ttr/pRa/oCT4HUoBWcY3w96+TGR6uXtvbDOM9WhPXGo+1bwhAsA/RXPA1ZX+oS6t4rl/ZvkMZZN4VO5OvKph8tthdG3ocpXUw11zv6mQ7n6kyObLDCMFOtkdnhQBU/BGEK6mw4oTRa1Hd91+bUUqQh6hl3JeDk/t2KDWOEehOxgOqfVG72UuMeo2IayNK/pUXrcUXuywq9KT+bWQxdJsXzwkkyT8Ovz4oiIzHAa14e/Ib8Xxz+BHwpN3TtOXsHziuqLGMzqv867CganwsFxNEGRaTQ6C2bRK+OxetaxhQqe1G/UWwfi5a9PuJC3wfITSa0IhBot9hGAG35VVb4LsRE=vbatts-tar-split-6881021/archive/tar/testdata/pax-global-records.tar000066400000000000000000000160001467537433400253570ustar00rootroot00000000000000GlobalHead.0.00000000000000000000000000000004600000000000010217 gustar0022 mtime=1500000000.0 16 path=global1 file10000000000000000000000000000000000000000000010100 0ustar0000000000000000PaxHeaders.0/file20000000000000000000000000000001600000000000011142 xustar0014 path=file2 file20000000000000000000000000000000000000000000010101 0ustar0000000000000000GlobalHead.0.00000000000000000000000000000001000000000000010206 gustar008 path= file30000000000000000000000000000000000000000000010102 0ustar0000000000000000PaxHeaders.0/file40000000000000000000000000000002400000000000011143 xustar0020 mtime=1400000000 file40000000000000000000000000000000012334447000010137 0ustar0000000000000000vbatts-tar-split-6881021/archive/tar/testdata/pax-multi-hdrs.tar000066400000000000000000000110001467537433400245430ustar00rootroot00000000000000path/to/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/r0000000000000000000000000000004100000000000032025 xustar000000000000000033 path=PAX1/PAX1/long-path-name path/to/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/r0000000000000000000000000000004100000000000032025 xustar000000000000000033 path=PAX2/PAX2/long-path-name path/to/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/r0000000000000000000000000000005100000000000032026 xustar000000000000000041 linkpath=PAX3/PAX3/long-linkpath-name path/to/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/r0000000000000000000000000000005100000000000032026 xustar000000000000000041 linkpath=PAX4/PAX4/long-linkpath-name bar0000000000000000000000000000000000000000000007112 2fooustar00vbatts-tar-split-6881021/archive/tar/testdata/pax-nil-sparse-data.tar000066400000000000000000000100001467537433400254360ustar00rootroot00000000000000PaxHeaders.0/sparse.db0000000000000000000000000000016200000000000012024 xustar0022 GNU.sparse.major=1 22 GNU.sparse.minor=0 29 GNU.sparse.name=sparse.db 28 GNU.sparse.realsize=1000 13 size=1512 GNUSparseFile.0/sparse.db0000000000000000000000000000275000000000000013544 0ustar00000000000000001 0 1000 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789vbatts-tar-split-6881021/archive/tar/testdata/pax-nil-sparse-hole.tar000066400000000000000000000060001467537433400254610ustar00rootroot00000000000000PaxHeaders.0/sparse.db0000000000000000000000000000016100000000000012023 xustar0022 GNU.sparse.major=1 22 GNU.sparse.minor=0 29 GNU.sparse.name=sparse.db 28 GNU.sparse.realsize=1000 12 size=512 GNUSparseFile.0/sparse.db0000000000000000000000000000100000000000000013527 0ustar00000000000000001 1000 0 vbatts-tar-split-6881021/archive/tar/testdata/pax-nul-path.tar000066400000000000000000000050001467537433400242060ustar00rootroot00000000000000PaxHeaders.0/0123456789012345678901234567890123456789012345678901234567890123456789012345678901234560000000000000000000000000000032300000000000022376 xustar0000000000000000211 path=01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890000000000000000000000000000000000000000000021361 0ustar0000000000000000vbatts-tar-split-6881021/archive/tar/testdata/pax-nul-xattrs.tar000066400000000000000000000050001467537433400245770ustar00rootroot00000000000000PaxHeaders.0/bad-null.txt0000000000000000000000000000003700000000000013720 xustar000000000000000031 SCHILY.xattr.null=fizzbuzz bad-null.txt0000000000000000000000000000000000000000000011414 0ustar0000000000000000vbatts-tar-split-6881021/archive/tar/testdata/pax-path-hdr.tar000066400000000000000000000020001467537433400241620ustar00rootroot00000000000000path/to/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/r0000000000000000000000000000004100000000000032025 xustar000000000000000033 path=PAX1/PAX1/long-path-name vbatts-tar-split-6881021/archive/tar/testdata/pax-pos-size-file.tar000066400000000000000000000050001467537433400251440ustar00rootroot00000000000000path/to/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/readme/r0000000000000000000000000000004100000000000032025 xustar000000000000000033 size=000000000000000000000999 foo0000640116074500116100000000125412575676024010640 0ustar00joetsaiengiRFmWghs3CK9/2HSvRja4TzX8HsRwzbVYl+h0HRkH9uPho2BGmrG5a0vpHsPn2W7Pn33Ux/+rkLSA3GUOX/WiPmP+h73T1r0DZIDJXtOgYWIUhsqUE0zUz1LEaO/y2H+WAe/ZlWt90N2KHka0bkXajoEAdOUrN42PKl/3mu7jiCW45hTNBDp3ArJD8QHN7l3JFMfnusPuir9+K8Oh6bEfN2bHhXjZ41ZkweCHZWUKT8NsdHeObQnXAyvkU5q1OhefE0+uvksVba2ZNyhThAAGZgiqEtTOJJLm8zgcI5avXHMVwlR6mt1jepOct4jQNlAdpkmslKW3BuiwLswGAsw7ttr/pRa/oCT4HUoBWcY3w96+TGR6uXtvbDOM9WhPXGo+1bwhAsA/RXPA1ZX+oS6t4rl/ZvkMZZN4VO5OvKph8tthdG3ocpXUw11zv6mQ7n6kyObLDCMFOtkdnhQBU/BGEK6mw4oTRa1Hd91+bUUqQh6hl3JeDk/t2KDWOEehOxgOqfVG72UuMeo2IayNK/pUXrcUXuywq9KT+bWQxdJsXzwkkyT8Ovz4oiIzHAa14e/Ib8Xxz+BHwpN3TtOXsHziuqLGMzqv867CganwsFxNEGRaTQ6C2bRK+OxetaxhQqe1G/UWwfi5a9PuJC3wfITSa0IhBot9hGAG35VVb4LsRE=vbatts-tar-split-6881021/archive/tar/testdata/pax-records.tar000066400000000000000000000050001467537433400241170ustar00rootroot00000000000000PaxHeaders.0/file0000000000000000000000000000013500000000000011062 xustar0018 GOLANG.pkg=tar 25 comment=Hello, 世界 50 uname=longlonglonglonglonglonglonglonglonglong file0000000000000000000000000000000000000000000016617 0ustar00longlonglonglonglonglonglonglong00000000000000vbatts-tar-split-6881021/archive/tar/testdata/pax-sparse-big.tar000066400000000000000000000140001467537433400245120ustar00rootroot00000000000000PaxHeaders.0/pax-sparse0000000000000000000000000000017200000000000012227 xustar0022 GNU.sparse.major=1 22 GNU.sparse.minor=0 30 GNU.sparse.name=pax-sparse 35 GNU.sparse.realsize=60000000000 13 size=3584 GNUSparseFile.0/pax-sparse0000000000000000000000000000700000000000000013737 0ustar00000000000000006 9999999488 512 19999999488 512 29999999488 512 39999999488 512 49999999488 512 59999999488 512 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789vbatts-tar-split-6881021/archive/tar/testdata/pax.tar000066400000000000000000000240001467537433400224610ustar00rootroot00000000000000a/PaxHeaders.6887/12345678910111213141516171819202122232425262728293031323334353637383940414243444540000644000175000017500000000044612036615200022461 xustar0000000000000000204 path=a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100 30 mtime=1350244992.023960108 30 atime=1350244992.023960108 30 ctime=1350244992.023960108 a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525350000664000175000017500000000000712036615200023454 0ustar00shaneshane00000000000000shaner a/PaxHeaders.6887/b0000644000175000017500000000045012036666720012440 xustar0000000000000000206 linkpath=123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100 30 mtime=1350266320.910238425 30 atime=1350266320.910238425 30 ctime=1350266320.910238425 a/b0000777000175000017500000000000012036666720024004 21234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545ustar00shaneshane00000000000000vbatts-tar-split-6881021/archive/tar/testdata/small.txt000066400000000000000000000000051467537433400230310ustar00rootroot00000000000000Kiltsvbatts-tar-split-6881021/archive/tar/testdata/small2.txt000066400000000000000000000000131467537433400231120ustar00rootroot00000000000000Google.com vbatts-tar-split-6881021/archive/tar/testdata/sparse-formats.tar000066400000000000000000000430001467537433400246400ustar00rootroot00000000000000sparse-gnu0000644000175000017500000000013712277442734023356 Sustar daviddavid0000000000100000000001000000000030000000000100000000005000000000010000000000700000000001000000003100000000001100000000001000000000130000000000100000000015000000000010000000001700000000001000000000210000000000100000000023000000000010000000002500000000001000000000270000000000100000000031000000000010000000003300000000001000000000350000000000100000000037000000000010000000004100000000001000000000430000000000100000000045000000000010000000004700000000001000000000510000000000100000000053000000000010000000005500000000001000000000570000000000100000000061000000000010000000006300000000001000000000650000000000100000000067000000000010000000007100000000001000000000730000000000100000000075000000000010000000007700000000001000000001010000000000100000000103000000000010000000010500000000001000000001070000000000100000000111000000000010000000011300000000001000000001150000000000100000000117000000000010000000012100000000001000000001230000000000100000000125000000000010000000012700000000001000000001310000000000100000000133000000000010000000013500000000001000000001370000000000100000000141000000000010000000014300000000001000000001450000000000100000000147000000000010000000015100000000001000000001530000000000100000000155000000000010000000015700000000001000000001610000000000100000000163000000000010000000016500000000001000000001670000000000100000000171000000000010000000017300000000001000000001750000000000100000000177000000000010000000020100000000001000000002030000000000100000000205000000000010000000020700000000001000000002110000000000100000000213000000000010000000021500000000001000000002170000000000100000000221000000000010000000022300000000001000000002250000000000100000000227000000000010000000023100000000001000000002330000000000100000000235000000000010000000023700000000001000000002410000000000100000000243000000000010000000024500000000001000000002470000000000100000000251000000000010000000025300000000001000000002550000000000100000000257000000000010000000026100000000001000000002630000000000100000000265000000000010000000026700000000001000000002710000000000100000000273000000000010000000027500000000001GoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGo!./PaxHeaders.1234/sparse-posix-0.00000644000175000017500000001121112277274257015625 xustar00daviddavid23 GNU.sparse.size=200 27 GNU.sparse.numblocks=95 23 GNU.sparse.offset=1 25 GNU.sparse.numbytes=1 23 GNU.sparse.offset=3 25 GNU.sparse.numbytes=1 23 GNU.sparse.offset=5 25 GNU.sparse.numbytes=1 23 GNU.sparse.offset=7 25 GNU.sparse.numbytes=1 23 GNU.sparse.offset=9 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=11 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=13 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=15 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=17 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=19 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=21 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=23 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=25 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=27 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=29 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=31 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=33 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=35 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=37 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=39 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=41 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=43 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=45 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=47 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=49 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=51 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=53 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=55 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=57 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=59 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=61 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=63 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=65 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=67 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=69 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=71 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=73 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=75 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=77 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=79 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=81 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=83 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=85 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=87 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=89 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=91 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=93 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=95 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=97 25 GNU.sparse.numbytes=1 24 GNU.sparse.offset=99 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=101 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=103 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=105 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=107 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=109 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=111 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=113 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=115 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=117 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=119 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=121 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=123 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=125 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=127 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=129 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=131 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=133 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=135 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=137 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=139 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=141 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=143 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=145 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=147 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=149 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=151 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=153 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=155 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=157 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=159 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=161 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=163 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=165 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=167 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=169 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=171 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=173 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=175 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=177 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=179 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=181 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=183 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=185 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=187 25 GNU.sparse.numbytes=1 25 GNU.sparse.offset=189 25 GNU.sparse.numbytes=1 sparse-posix-0.00000644000175000017500000000013712277272253012743 0ustar00daviddavidGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGo!./PaxHeaders.1234/sparse-posix-0.10000644000175000017500000000115412277271266015630 xustar00daviddavid23 GNU.sparse.size=200 27 GNU.sparse.numblocks=95 36 GNU.sparse.name=sparse-posix-0.1 534 GNU.sparse.map=1,1,3,1,5,1,7,1,9,1,11,1,13,1,15,1,17,1,19,1,21,1,23,1,25,1,27,1,29,1,31,1,33,1,35,1,37,1,39,1,41,1,43,1,45,1,47,1,49,1,51,1,53,1,55,1,57,1,59,1,61,1,63,1,65,1,67,1,69,1,71,1,73,1,75,1,77,1,79,1,81,1,83,1,85,1,87,1,89,1,91,1,93,1,95,1,97,1,99,1,101,1,103,1,105,1,107,1,109,1,111,1,113,1,115,1,117,1,119,1,121,1,123,1,125,1,127,1,129,1,131,1,133,1,135,1,137,1,139,1,141,1,143,1,145,1,147,1,149,1,151,1,153,1,155,1,157,1,159,1,161,1,163,1,165,1,167,1,169,1,171,1,173,1,175,1,177,1,179,1,181,1,183,1,185,1,187,1,189,1 ./GNUSparseFile.1234/sparse-posix-0.10000644000175000017500000000013712277266750016105 0ustar00daviddavidGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGo!./PaxHeaders.1234/sparse-posix-1.00000644000175000017500000000015312277261432015620 xustar00daviddavid22 GNU.sparse.major=1 22 GNU.sparse.minor=0 36 GNU.sparse.name=sparse-posix-1.0 27 GNU.sparse.realsize=200 ./GNUSparseFile.1234/sparse-posix-1.00000644000175000017500000000213712277260774016107 0ustar00daviddavid95 1 1 3 1 5 1 7 1 9 1 11 1 13 1 15 1 17 1 19 1 21 1 23 1 25 1 27 1 29 1 31 1 33 1 35 1 37 1 39 1 41 1 43 1 45 1 47 1 49 1 51 1 53 1 55 1 57 1 59 1 61 1 63 1 65 1 67 1 69 1 71 1 73 1 75 1 77 1 79 1 81 1 83 1 85 1 87 1 89 1 91 1 93 1 95 1 97 1 99 1 101 1 103 1 105 1 107 1 109 1 111 1 113 1 115 1 117 1 119 1 121 1 123 1 125 1 127 1 129 1 131 1 133 1 135 1 137 1 139 1 141 1 143 1 145 1 147 1 149 1 151 1 153 1 155 1 157 1 159 1 161 1 163 1 165 1 167 1 169 1 171 1 173 1 175 1 177 1 179 1 181 1 183 1 185 1 187 1 189 1 GoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGoGo!end0000644000175000017500000000000412277447757010527 0ustar daviddavidend vbatts-tar-split-6881021/archive/tar/testdata/star.tar000066400000000000000000000060001467537433400226420ustar00rootroot00000000000000small.txt0000640 0216501 0011610 00000000005 11213575217 0016730 0ustar00dsymondseng0000000 0000000 11213575217 11213575217 tarKiltssmall2.txt0000640 0216501 0011610 00000000013 11213575217 0017011 0ustar00dsymondseng0000000 0000000 11213575217 11213575217 tarGoogle.com vbatts-tar-split-6881021/archive/tar/testdata/trailing-slash.tar000066400000000000000000000050001467537433400246110ustar00rootroot00000000000000123456789/123456789/123456789/123456789/123456789/123456789/123456789/123456789/123456789/1234567890000000000000000000000000000046600000000000020160 xustar00310 path=123456789/123456789/123456789/123456789/123456789/123456789/123456789/123456789/123456789/123456789/123456789/123456789/123456789/123456789/123456789/123456789/123456789/123456789/123456789/123456789/123456789/123456789/123456789/123456789/123456789/123456789/123456789/123456789/123456789/123456789/ 123456789/123456789/123456789/123456789/123456789/123456789/123456789/123456789/123456789/1234567890000000000000000000000000000000000000000000021275 5ustar0000000000000000vbatts-tar-split-6881021/archive/tar/testdata/ustar-file-devs.tar000066400000000000000000000030001467537433400247000ustar00rootroot00000000000000file0000644000000000000000000000000000000000000010037 0ustar0000000010000001vbatts-tar-split-6881021/archive/tar/testdata/ustar-file-reg.tar000066400000000000000000000030001467537433400245140ustar00rootroot00000000000000foo0000640116074500116100000000125412575676024010640 0ustar00joetsaiengiRFmWghs3CK9/2HSvRja4TzX8HsRwzbVYl+h0HRkH9uPho2BGmrG5a0vpHsPn2W7Pn33Ux/+rkLSA3GUOX/WiPmP+h73T1r0DZIDJXtOgYWIUhsqUE0zUz1LEaO/y2H+WAe/ZlWt90N2KHka0bkXajoEAdOUrN42PKl/3mu7jiCW45hTNBDp3ArJD8QHN7l3JFMfnusPuir9+K8Oh6bEfN2bHhXjZ41ZkweCHZWUKT8NsdHeObQnXAyvkU5q1OhefE0+uvksVba2ZNyhThAAGZgiqEtTOJJLm8zgcI5avXHMVwlR6mt1jepOct4jQNlAdpkmslKW3BuiwLswGAsw7ttr/pRa/oCT4HUoBWcY3w96+TGR6uXtvbDOM9WhPXGo+1bwhAsA/RXPA1ZX+oS6t4rl/ZvkMZZN4VO5OvKph8tthdG3ocpXUw11zv6mQ7n6kyObLDCMFOtkdnhQBU/BGEK6mw4oTRa1Hd91+bUUqQh6hl3JeDk/t2KDWOEehOxgOqfVG72UuMeo2IayNK/pUXrcUXuywq9KT+bWQxdJsXzwkkyT8Ovz4oiIzHAa14e/Ib8Xxz+BHwpN3TtOXsHziuqLGMzqv867CganwsFxNEGRaTQ6C2bRK+OxetaxhQqe1G/UWwfi5a9PuJC3wfITSa0IhBot9hGAG35VVb4LsRE=vbatts-tar-split-6881021/archive/tar/testdata/ustar.tar000066400000000000000000000040001467537433400230250ustar00rootroot00000000000000file.txt0000644000076500000240000000000612104402656045134 0ustar00shanestaff00000000000000longname/longname/longname/longname/longname/longname/longname/longname/longname/longname/longname/longname/longname/longname/longnamehello vbatts-tar-split-6881021/archive/tar/testdata/v7.tar000066400000000000000000000070001467537433400222260ustar00rootroot00000000000000small.txt 444 216501 11610 5 11213575720 6062 Kilts=$AlD"鯗!"G B 鯗B HAl L㏗ ㏑41uᏔhv㏔H  H 4Hhh hQnn㏔ zB+L/Zthread_helpers@``small2.txt 444 216501 11610 13 11213575720 6163 Google.com =$Al=?lD"鯗!"G B 鯗B HAl D"C!"Q L CL ?l | C41uᏥ ㏑㏔H 41uᏔv㏊Xĥ  hx 4Xĥh㏘ hQnn㏊ /)+LH/m(mvbatts-tar-split-6881021/archive/tar/testdata/writer-big-long.tar000066400000000000000000000030001467537433400246760ustar00rootroot00000000000000longname/longname/longname/longname/longname/longname/longname/longname/longname/longname/longname/l0000000000000000000000000000025600000000000031420 xustar00154 path=longname/longname/longname/longname/longname/longname/longname/longname/longname/longname/longname/longname/longname/longname/longname/16gig.txt 20 size=17179869184 longname/longname/longname/longname/longname/longname/longname/longname/longname/longname/longname/l0000644000175000017500000000000012332770507036462 0ustar00guillaumeguillaume00000000000000vbatts-tar-split-6881021/archive/tar/testdata/writer-big.tar000066400000000000000000000010001467537433400237370ustar00rootroot00000000000000tmp/16gig.txt00006400216501001161011262231050013123 0ustar dsymondseng00000000000000vbatts-tar-split-6881021/archive/tar/testdata/writer.tar000066400000000000000000000070001467537433400232060ustar00rootroot00000000000000small.txt0000640021650100116100000000000511223032352013400 0ustar00dsymondseng00000000000000Kiltssmall2.txt0000640021650100116100000000001311216101324013457 0ustar00dsymondseng00000000000000Google.com link.txt0000777000175000017500000000000011626640112015665 2small.txtustar00stringsstrings00000000000000vbatts-tar-split-6881021/archive/tar/testdata/xattrs.tar000066400000000000000000000120001467537433400232130ustar00rootroot00000000000000./PaxHeaders.29205/small.txt0000644000000000000000000000033512247327552014100 xustar000000000000000029 mtime=1386065770.44825232 29 atime=1389782991.41987522 30 ctime=1389782956.794414986 31 SCHILY.xattr.user.key=value 33 SCHILY.xattr.user.key2=value2 69 SCHILY.xattr.security.selinux=unconfined_u:object_r:default_t:s0 small.txt0000644000175000000120000000000512247327552013040 0ustar00alexwheel00000000000000Kilts./PaxHeaders.29205/small2.txt0000644000000000000000000000023612247327552014162 xustar000000000000000030 mtime=1386065770.449252304 29 atime=1389782991.41987522 30 ctime=1386065770.449252304 69 SCHILY.xattr.security.selinux=unconfined_u:object_r:default_t:s0 small2.txt0000644000175000000120000000001312247327552013121 0ustar00alexwheel00000000000000Google.com vbatts-tar-split-6881021/archive/tar/writer.go000066400000000000000000000446521467537433400212320ustar00rootroot00000000000000// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package tar import ( "fmt" "io" "path" "sort" "strings" "time" ) // Writer provides sequential writing of a tar archive. // Write.WriteHeader begins a new file with the provided Header, // and then Writer can be treated as an io.Writer to supply that file's data. type Writer struct { w io.Writer pad int64 // Amount of padding to write after current file entry curr fileWriter // Writer for current file entry hdr Header // Shallow copy of Header that is safe for mutations blk block // Buffer to use as temporary local storage // err is a persistent error. // It is only the responsibility of every exported method of Writer to // ensure that this error is sticky. err error } // NewWriter creates a new Writer writing to w. func NewWriter(w io.Writer) *Writer { return &Writer{w: w, curr: ®FileWriter{w, 0}} } type fileWriter interface { io.Writer fileState ReadFrom(io.Reader) (int64, error) } // Flush finishes writing the current file's block padding. // The current file must be fully written before Flush can be called. // // This is unnecessary as the next call to WriteHeader or Close // will implicitly flush out the file's padding. func (tw *Writer) Flush() error { if tw.err != nil { return tw.err } if nb := tw.curr.LogicalRemaining(); nb > 0 { return fmt.Errorf("archive/tar: missed writing %d bytes", nb) } if _, tw.err = tw.w.Write(zeroBlock[:tw.pad]); tw.err != nil { return tw.err } tw.pad = 0 return nil } // WriteHeader writes hdr and prepares to accept the file's contents. // The Header.Size determines how many bytes can be written for the next file. // If the current file is not fully written, then this returns an error. // This implicitly flushes any padding necessary before writing the header. func (tw *Writer) WriteHeader(hdr *Header) error { if err := tw.Flush(); err != nil { return err } tw.hdr = *hdr // Shallow copy of Header // Avoid usage of the legacy TypeRegA flag, and automatically promote // it to use TypeReg or TypeDir. if tw.hdr.Typeflag == TypeRegA { if strings.HasSuffix(tw.hdr.Name, "/") { tw.hdr.Typeflag = TypeDir } else { tw.hdr.Typeflag = TypeReg } } // Round ModTime and ignore AccessTime and ChangeTime unless // the format is explicitly chosen. // This ensures nominal usage of WriteHeader (without specifying the format) // does not always result in the PAX format being chosen, which // causes a 1KiB increase to every header. if tw.hdr.Format == FormatUnknown { tw.hdr.ModTime = tw.hdr.ModTime.Round(time.Second) tw.hdr.AccessTime = time.Time{} tw.hdr.ChangeTime = time.Time{} } allowedFormats, paxHdrs, err := tw.hdr.allowedFormats() switch { case allowedFormats.has(FormatUSTAR): tw.err = tw.writeUSTARHeader(&tw.hdr) return tw.err case allowedFormats.has(FormatPAX): tw.err = tw.writePAXHeader(&tw.hdr, paxHdrs) return tw.err case allowedFormats.has(FormatGNU): tw.err = tw.writeGNUHeader(&tw.hdr) return tw.err default: return err // Non-fatal error } } func (tw *Writer) writeUSTARHeader(hdr *Header) error { // Check if we can use USTAR prefix/suffix splitting. var namePrefix string if prefix, suffix, ok := splitUSTARPath(hdr.Name); ok { namePrefix, hdr.Name = prefix, suffix } // Pack the main header. var f formatter blk := tw.templateV7Plus(hdr, f.formatString, f.formatOctal) f.formatString(blk.USTAR().Prefix(), namePrefix) blk.SetFormat(FormatUSTAR) if f.err != nil { return f.err // Should never happen since header is validated } return tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag) } func (tw *Writer) writePAXHeader(hdr *Header, paxHdrs map[string]string) error { realName, realSize := hdr.Name, hdr.Size // TODO(dsnet): Re-enable this when adding sparse support. // See https://golang.org/issue/22735 /* // Handle sparse files. var spd sparseDatas var spb []byte if len(hdr.SparseHoles) > 0 { sph := append([]sparseEntry{}, hdr.SparseHoles...) // Copy sparse map sph = alignSparseEntries(sph, hdr.Size) spd = invertSparseEntries(sph, hdr.Size) // Format the sparse map. hdr.Size = 0 // Replace with encoded size spb = append(strconv.AppendInt(spb, int64(len(spd)), 10), '\n') for _, s := range spd { hdr.Size += s.Length spb = append(strconv.AppendInt(spb, s.Offset, 10), '\n') spb = append(strconv.AppendInt(spb, s.Length, 10), '\n') } pad := blockPadding(int64(len(spb))) spb = append(spb, zeroBlock[:pad]...) hdr.Size += int64(len(spb)) // Accounts for encoded sparse map // Add and modify appropriate PAX records. dir, file := path.Split(realName) hdr.Name = path.Join(dir, "GNUSparseFile.0", file) paxHdrs[paxGNUSparseMajor] = "1" paxHdrs[paxGNUSparseMinor] = "0" paxHdrs[paxGNUSparseName] = realName paxHdrs[paxGNUSparseRealSize] = strconv.FormatInt(realSize, 10) paxHdrs[paxSize] = strconv.FormatInt(hdr.Size, 10) delete(paxHdrs, paxPath) // Recorded by paxGNUSparseName } */ _ = realSize // Write PAX records to the output. isGlobal := hdr.Typeflag == TypeXGlobalHeader if len(paxHdrs) > 0 || isGlobal { // Sort keys for deterministic ordering. var keys []string for k := range paxHdrs { keys = append(keys, k) } sort.Strings(keys) // Write each record to a buffer. var buf strings.Builder for _, k := range keys { rec, err := formatPAXRecord(k, paxHdrs[k]) if err != nil { return err } buf.WriteString(rec) } // Write the extended header file. var name string var flag byte if isGlobal { name = realName if name == "" { name = "GlobalHead.0.0" } flag = TypeXGlobalHeader } else { dir, file := path.Split(realName) name = path.Join(dir, "PaxHeaders.0", file) flag = TypeXHeader } data := buf.String() if err := tw.writeRawFile(name, data, flag, FormatPAX); err != nil || isGlobal { return err // Global headers return here } } // Pack the main header. var f formatter // Ignore errors since they are expected fmtStr := func(b []byte, s string) { f.formatString(b, toASCII(s)) } blk := tw.templateV7Plus(hdr, fmtStr, f.formatOctal) blk.SetFormat(FormatPAX) if err := tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag); err != nil { return err } // TODO(dsnet): Re-enable this when adding sparse support. // See https://golang.org/issue/22735 /* // Write the sparse map and setup the sparse writer if necessary. if len(spd) > 0 { // Use tw.curr since the sparse map is accounted for in hdr.Size. if _, err := tw.curr.Write(spb); err != nil { return err } tw.curr = &sparseFileWriter{tw.curr, spd, 0} } */ return nil } func (tw *Writer) writeGNUHeader(hdr *Header) error { // Use long-link files if Name or Linkname exceeds the field size. const longName = "././@LongLink" if len(hdr.Name) > nameSize { data := hdr.Name + "\x00" if err := tw.writeRawFile(longName, data, TypeGNULongName, FormatGNU); err != nil { return err } } if len(hdr.Linkname) > nameSize { data := hdr.Linkname + "\x00" if err := tw.writeRawFile(longName, data, TypeGNULongLink, FormatGNU); err != nil { return err } } // Pack the main header. var f formatter // Ignore errors since they are expected var spd sparseDatas var spb []byte blk := tw.templateV7Plus(hdr, f.formatString, f.formatNumeric) if !hdr.AccessTime.IsZero() { f.formatNumeric(blk.GNU().AccessTime(), hdr.AccessTime.Unix()) } if !hdr.ChangeTime.IsZero() { f.formatNumeric(blk.GNU().ChangeTime(), hdr.ChangeTime.Unix()) } // TODO(dsnet): Re-enable this when adding sparse support. // See https://golang.org/issue/22735 /* if hdr.Typeflag == TypeGNUSparse { sph := append([]sparseEntry{}, hdr.SparseHoles...) // Copy sparse map sph = alignSparseEntries(sph, hdr.Size) spd = invertSparseEntries(sph, hdr.Size) // Format the sparse map. formatSPD := func(sp sparseDatas, sa sparseArray) sparseDatas { for i := 0; len(sp) > 0 && i < sa.MaxEntries(); i++ { f.formatNumeric(sa.Entry(i).Offset(), sp[0].Offset) f.formatNumeric(sa.Entry(i).Length(), sp[0].Length) sp = sp[1:] } if len(sp) > 0 { sa.IsExtended()[0] = 1 } return sp } sp2 := formatSPD(spd, blk.GNU().Sparse()) for len(sp2) > 0 { var spHdr block sp2 = formatSPD(sp2, spHdr.Sparse()) spb = append(spb, spHdr[:]...) } // Update size fields in the header block. realSize := hdr.Size hdr.Size = 0 // Encoded size; does not account for encoded sparse map for _, s := range spd { hdr.Size += s.Length } copy(blk.V7().Size(), zeroBlock[:]) // Reset field f.formatNumeric(blk.V7().Size(), hdr.Size) f.formatNumeric(blk.GNU().RealSize(), realSize) } */ blk.SetFormat(FormatGNU) if err := tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag); err != nil { return err } // Write the extended sparse map and setup the sparse writer if necessary. if len(spd) > 0 { // Use tw.w since the sparse map is not accounted for in hdr.Size. if _, err := tw.w.Write(spb); err != nil { return err } tw.curr = &sparseFileWriter{tw.curr, spd, 0} } return nil } type ( stringFormatter func([]byte, string) numberFormatter func([]byte, int64) ) // templateV7Plus fills out the V7 fields of a block using values from hdr. // It also fills out fields (uname, gname, devmajor, devminor) that are // shared in the USTAR, PAX, and GNU formats using the provided formatters. // // The block returned is only valid until the next call to // templateV7Plus or writeRawFile. func (tw *Writer) templateV7Plus(hdr *Header, fmtStr stringFormatter, fmtNum numberFormatter) *block { tw.blk.Reset() modTime := hdr.ModTime if modTime.IsZero() { modTime = time.Unix(0, 0) } v7 := tw.blk.V7() v7.TypeFlag()[0] = hdr.Typeflag fmtStr(v7.Name(), hdr.Name) fmtStr(v7.LinkName(), hdr.Linkname) fmtNum(v7.Mode(), hdr.Mode) fmtNum(v7.UID(), int64(hdr.Uid)) fmtNum(v7.GID(), int64(hdr.Gid)) fmtNum(v7.Size(), hdr.Size) fmtNum(v7.ModTime(), modTime.Unix()) ustar := tw.blk.USTAR() fmtStr(ustar.UserName(), hdr.Uname) fmtStr(ustar.GroupName(), hdr.Gname) fmtNum(ustar.DevMajor(), hdr.Devmajor) fmtNum(ustar.DevMinor(), hdr.Devminor) return &tw.blk } // writeRawFile writes a minimal file with the given name and flag type. // It uses format to encode the header format and will write data as the body. // It uses default values for all of the other fields (as BSD and GNU tar does). func (tw *Writer) writeRawFile(name, data string, flag byte, format Format) error { tw.blk.Reset() // Best effort for the filename. name = toASCII(name) if len(name) > nameSize { name = name[:nameSize] } name = strings.TrimRight(name, "/") var f formatter v7 := tw.blk.V7() v7.TypeFlag()[0] = flag f.formatString(v7.Name(), name) f.formatOctal(v7.Mode(), 0) f.formatOctal(v7.UID(), 0) f.formatOctal(v7.GID(), 0) f.formatOctal(v7.Size(), int64(len(data))) // Must be < 8GiB f.formatOctal(v7.ModTime(), 0) tw.blk.SetFormat(format) if f.err != nil { return f.err // Only occurs if size condition is violated } // Write the header and data. if err := tw.writeRawHeader(&tw.blk, int64(len(data)), flag); err != nil { return err } _, err := io.WriteString(tw, data) return err } // writeRawHeader writes the value of blk, regardless of its value. // It sets up the Writer such that it can accept a file of the given size. // If the flag is a special header-only flag, then the size is treated as zero. func (tw *Writer) writeRawHeader(blk *block, size int64, flag byte) error { if err := tw.Flush(); err != nil { return err } if _, err := tw.w.Write(blk[:]); err != nil { return err } if isHeaderOnlyType(flag) { size = 0 } tw.curr = ®FileWriter{tw.w, size} tw.pad = blockPadding(size) return nil } // splitUSTARPath splits a path according to USTAR prefix and suffix rules. // If the path is not splittable, then it will return ("", "", false). func splitUSTARPath(name string) (prefix, suffix string, ok bool) { length := len(name) if length <= nameSize || !isASCII(name) { return "", "", false } else if length > prefixSize+1 { length = prefixSize + 1 } else if name[length-1] == '/' { length-- } i := strings.LastIndex(name[:length], "/") nlen := len(name) - i - 1 // nlen is length of suffix plen := i // plen is length of prefix if i <= 0 || nlen > nameSize || nlen == 0 || plen > prefixSize { return "", "", false } return name[:i], name[i+1:], true } // Write writes to the current file in the tar archive. // Write returns the error ErrWriteTooLong if more than // Header.Size bytes are written after WriteHeader. // // Calling Write on special types like TypeLink, TypeSymlink, TypeChar, // TypeBlock, TypeDir, and TypeFifo returns (0, ErrWriteTooLong) regardless // of what the Header.Size claims. func (tw *Writer) Write(b []byte) (int, error) { if tw.err != nil { return 0, tw.err } n, err := tw.curr.Write(b) if err != nil && err != ErrWriteTooLong { tw.err = err } return n, err } // readFrom populates the content of the current file by reading from r. // The bytes read must match the number of remaining bytes in the current file. // // If the current file is sparse and r is an io.ReadSeeker, // then readFrom uses Seek to skip past holes defined in Header.SparseHoles, // assuming that skipped regions are all NULs. // This always reads the last byte to ensure r is the right size. // // TODO(dsnet): Re-export this when adding sparse file support. // See https://golang.org/issue/22735 func (tw *Writer) readFrom(r io.Reader) (int64, error) { if tw.err != nil { return 0, tw.err } n, err := tw.curr.ReadFrom(r) if err != nil && err != ErrWriteTooLong { tw.err = err } return n, err } // Close closes the tar archive by flushing the padding, and writing the footer. // If the current file (from a prior call to WriteHeader) is not fully written, // then this returns an error. func (tw *Writer) Close() error { if tw.err == ErrWriteAfterClose { return nil } if tw.err != nil { return tw.err } // Trailer: two zero blocks. err := tw.Flush() for i := 0; i < 2 && err == nil; i++ { _, err = tw.w.Write(zeroBlock[:]) } // Ensure all future actions are invalid. tw.err = ErrWriteAfterClose return err // Report IO errors } // regFileWriter is a fileWriter for writing data to a regular file entry. type regFileWriter struct { w io.Writer // Underlying Writer nb int64 // Number of remaining bytes to write } func (fw *regFileWriter) Write(b []byte) (n int, err error) { overwrite := int64(len(b)) > fw.nb if overwrite { b = b[:fw.nb] } if len(b) > 0 { n, err = fw.w.Write(b) fw.nb -= int64(n) } switch { case err != nil: return n, err case overwrite: return n, ErrWriteTooLong default: return n, nil } } func (fw *regFileWriter) ReadFrom(r io.Reader) (int64, error) { return io.Copy(struct{ io.Writer }{fw}, r) } func (fw regFileWriter) LogicalRemaining() int64 { return fw.nb } func (fw regFileWriter) PhysicalRemaining() int64 { return fw.nb } // sparseFileWriter is a fileWriter for writing data to a sparse file entry. type sparseFileWriter struct { fw fileWriter // Underlying fileWriter sp sparseDatas // Normalized list of data fragments pos int64 // Current position in sparse file } func (sw *sparseFileWriter) Write(b []byte) (n int, err error) { overwrite := int64(len(b)) > sw.LogicalRemaining() if overwrite { b = b[:sw.LogicalRemaining()] } b0 := b endPos := sw.pos + int64(len(b)) for endPos > sw.pos && err == nil { var nf int // Bytes written in fragment dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset() if sw.pos < dataStart { // In a hole fragment bf := b[:min(int64(len(b)), dataStart-sw.pos)] nf, err = zeroWriter{}.Write(bf) } else { // In a data fragment bf := b[:min(int64(len(b)), dataEnd-sw.pos)] nf, err = sw.fw.Write(bf) } b = b[nf:] sw.pos += int64(nf) if sw.pos >= dataEnd && len(sw.sp) > 1 { sw.sp = sw.sp[1:] // Ensure last fragment always remains } } n = len(b0) - len(b) switch { case err == ErrWriteTooLong: return n, errMissData // Not possible; implies bug in validation logic case err != nil: return n, err case sw.LogicalRemaining() == 0 && sw.PhysicalRemaining() > 0: return n, errUnrefData // Not possible; implies bug in validation logic case overwrite: return n, ErrWriteTooLong default: return n, nil } } func (sw *sparseFileWriter) ReadFrom(r io.Reader) (n int64, err error) { rs, ok := r.(io.ReadSeeker) if ok { if _, err := rs.Seek(0, io.SeekCurrent); err != nil { ok = false // Not all io.Seeker can really seek } } if !ok { return io.Copy(struct{ io.Writer }{sw}, r) } var readLastByte bool pos0 := sw.pos for sw.LogicalRemaining() > 0 && !readLastByte && err == nil { var nf int64 // Size of fragment dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset() if sw.pos < dataStart { // In a hole fragment nf = dataStart - sw.pos if sw.PhysicalRemaining() == 0 { readLastByte = true nf-- } _, err = rs.Seek(nf, io.SeekCurrent) } else { // In a data fragment nf = dataEnd - sw.pos nf, err = io.CopyN(sw.fw, rs, nf) } sw.pos += nf if sw.pos >= dataEnd && len(sw.sp) > 1 { sw.sp = sw.sp[1:] // Ensure last fragment always remains } } // If the last fragment is a hole, then seek to 1-byte before EOF, and // read a single byte to ensure the file is the right size. if readLastByte && err == nil { _, err = mustReadFull(rs, []byte{0}) sw.pos++ } n = sw.pos - pos0 switch { case err == io.EOF: return n, io.ErrUnexpectedEOF case err == ErrWriteTooLong: return n, errMissData // Not possible; implies bug in validation logic case err != nil: return n, err case sw.LogicalRemaining() == 0 && sw.PhysicalRemaining() > 0: return n, errUnrefData // Not possible; implies bug in validation logic default: return n, ensureEOF(rs) } } func (sw sparseFileWriter) LogicalRemaining() int64 { return sw.sp[len(sw.sp)-1].endOffset() - sw.pos } func (sw sparseFileWriter) PhysicalRemaining() int64 { return sw.fw.PhysicalRemaining() } // zeroWriter may only be written with NULs, otherwise it returns errWriteHole. type zeroWriter struct{} func (zeroWriter) Write(b []byte) (int, error) { for i, c := range b { if c != 0 { return i, errWriteHole } } return len(b), nil } // ensureEOF checks whether r is at EOF, reporting ErrWriteTooLong if not so. func ensureEOF(r io.Reader) error { n, err := tryReadFull(r, []byte{0}) switch { case n > 0: return ErrWriteTooLong case err == io.EOF: return nil default: return err } } vbatts-tar-split-6881021/archive/tar/writer_test.go000066400000000000000000001110751467537433400222630ustar00rootroot00000000000000// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package tar import ( "bytes" "encoding/hex" "errors" "io" "os" "path" "reflect" "sort" "strings" "testing" "testing/iotest" "time" ) func bytediff(a, b []byte) string { const ( uniqueA = "- " uniqueB = "+ " identity = " " ) var ss []string sa := strings.Split(strings.TrimSpace(hex.Dump(a)), "\n") sb := strings.Split(strings.TrimSpace(hex.Dump(b)), "\n") for len(sa) > 0 && len(sb) > 0 { if sa[0] == sb[0] { ss = append(ss, identity+sa[0]) } else { ss = append(ss, uniqueA+sa[0]) ss = append(ss, uniqueB+sb[0]) } sa, sb = sa[1:], sb[1:] } for len(sa) > 0 { ss = append(ss, uniqueA+sa[0]) sa = sa[1:] } for len(sb) > 0 { ss = append(ss, uniqueB+sb[0]) sb = sb[1:] } return strings.Join(ss, "\n") } func TestWriter(t *testing.T) { type ( testHeader struct { // WriteHeader(hdr) == wantErr hdr Header wantErr error } testWrite struct { // Write(str) == (wantCnt, wantErr) str string wantCnt int wantErr error } testReadFrom struct { // ReadFrom(testFile{ops}) == (wantCnt, wantErr) ops fileOps wantCnt int64 wantErr error } testClose struct { // Close() == wantErr wantErr error } testFnc interface{} // testHeader | testWrite | testReadFrom | testClose ) vectors := []struct { file string // Optional filename of expected output tests []testFnc }{{ // The writer test file was produced with this command: // tar (GNU tar) 1.26 // ln -s small.txt link.txt // tar -b 1 --format=ustar -c -f writer.tar small.txt small2.txt link.txt file: "testdata/writer.tar", tests: []testFnc{ testHeader{Header{ Typeflag: TypeReg, Name: "small.txt", Size: 5, Mode: 0640, Uid: 73025, Gid: 5000, Uname: "dsymonds", Gname: "eng", ModTime: time.Unix(1246508266, 0), }, nil}, testWrite{"Kilts", 5, nil}, testHeader{Header{ Typeflag: TypeReg, Name: "small2.txt", Size: 11, Mode: 0640, Uid: 73025, Uname: "dsymonds", Gname: "eng", Gid: 5000, ModTime: time.Unix(1245217492, 0), }, nil}, testWrite{"Google.com\n", 11, nil}, testHeader{Header{ Typeflag: TypeSymlink, Name: "link.txt", Linkname: "small.txt", Mode: 0777, Uid: 1000, Gid: 1000, Uname: "strings", Gname: "strings", ModTime: time.Unix(1314603082, 0), }, nil}, testWrite{"", 0, nil}, testClose{nil}, }, }, { // The truncated test file was produced using these commands: // dd if=/dev/zero bs=1048576 count=16384 > /tmp/16gig.txt // tar -b 1 -c -f- /tmp/16gig.txt | dd bs=512 count=8 > writer-big.tar file: "testdata/writer-big.tar", tests: []testFnc{ testHeader{Header{ Typeflag: TypeReg, Name: "tmp/16gig.txt", Size: 16 << 30, Mode: 0640, Uid: 73025, Gid: 5000, Uname: "dsymonds", Gname: "eng", ModTime: time.Unix(1254699560, 0), Format: FormatGNU, }, nil}, }, }, { // This truncated file was produced using this library. // It was verified to work with GNU tar 1.27.1 and BSD tar 3.1.2. // dd if=/dev/zero bs=1G count=16 >> writer-big-long.tar // gnutar -xvf writer-big-long.tar // bsdtar -xvf writer-big-long.tar // // This file is in PAX format. file: "testdata/writer-big-long.tar", tests: []testFnc{ testHeader{Header{ Typeflag: TypeReg, Name: strings.Repeat("longname/", 15) + "16gig.txt", Size: 16 << 30, Mode: 0644, Uid: 1000, Gid: 1000, Uname: "guillaume", Gname: "guillaume", ModTime: time.Unix(1399583047, 0), }, nil}, }, }, { // This file was produced using GNU tar v1.17. // gnutar -b 4 --format=ustar (longname/)*15 + file.txt file: "testdata/ustar.tar", tests: []testFnc{ testHeader{Header{ Typeflag: TypeReg, Name: strings.Repeat("longname/", 15) + "file.txt", Size: 6, Mode: 0644, Uid: 501, Gid: 20, Uname: "shane", Gname: "staff", ModTime: time.Unix(1360135598, 0), }, nil}, testWrite{"hello\n", 6, nil}, testClose{nil}, }, }, { // This file was produced using GNU tar v1.26: // echo "Slartibartfast" > file.txt // ln file.txt hard.txt // tar -b 1 --format=ustar -c -f hardlink.tar file.txt hard.txt file: "testdata/hardlink.tar", tests: []testFnc{ testHeader{Header{ Typeflag: TypeReg, Name: "file.txt", Size: 15, Mode: 0644, Uid: 1000, Gid: 100, Uname: "vbatts", Gname: "users", ModTime: time.Unix(1425484303, 0), }, nil}, testWrite{"Slartibartfast\n", 15, nil}, testHeader{Header{ Typeflag: TypeLink, Name: "hard.txt", Linkname: "file.txt", Mode: 0644, Uid: 1000, Gid: 100, Uname: "vbatts", Gname: "users", ModTime: time.Unix(1425484303, 0), }, nil}, testWrite{"", 0, nil}, testClose{nil}, }, }, { tests: []testFnc{ testHeader{Header{ Typeflag: TypeReg, Name: "bad-null.txt", Xattrs: map[string]string{"null\x00null\x00": "fizzbuzz"}, }, headerError{}}, }, }, { tests: []testFnc{ testHeader{Header{ Typeflag: TypeReg, Name: "null\x00.txt", }, headerError{}}, }, }, { file: "testdata/pax-records.tar", tests: []testFnc{ testHeader{Header{ Typeflag: TypeReg, Name: "file", Uname: strings.Repeat("long", 10), PAXRecords: map[string]string{ "path": "FILE", // Should be ignored "GNU.sparse.map": "0,0", // Should be ignored "comment": "Hello, 世界", "GOLANG.pkg": "tar", }, }, nil}, testClose{nil}, }, }, { // Craft a theoretically valid PAX archive with global headers. // The GNU and BSD tar tools do not parse these the same way. // // BSD tar v3.1.2 parses and ignores all global headers; // the behavior is verified by researching the source code. // // $ bsdtar -tvf pax-global-records.tar // ---------- 0 0 0 0 Dec 31 1969 file1 // ---------- 0 0 0 0 Dec 31 1969 file2 // ---------- 0 0 0 0 Dec 31 1969 file3 // ---------- 0 0 0 0 May 13 2014 file4 // // GNU tar v1.27.1 applies global headers to subsequent records, // but does not do the following properly: // * It does not treat an empty record as deletion. // * It does not use subsequent global headers to update previous ones. // // $ gnutar -tvf pax-global-records.tar // ---------- 0/0 0 2017-07-13 19:40 global1 // ---------- 0/0 0 2017-07-13 19:40 file2 // gnutar: Substituting `.' for empty member name // ---------- 0/0 0 1969-12-31 16:00 // gnutar: Substituting `.' for empty member name // ---------- 0/0 0 2014-05-13 09:53 // // According to the PAX specification, this should have been the result: // ---------- 0/0 0 2017-07-13 19:40 global1 // ---------- 0/0 0 2017-07-13 19:40 file2 // ---------- 0/0 0 2017-07-13 19:40 file3 // ---------- 0/0 0 2014-05-13 09:53 file4 file: "testdata/pax-global-records.tar", tests: []testFnc{ testHeader{Header{ Typeflag: TypeXGlobalHeader, PAXRecords: map[string]string{"path": "global1", "mtime": "1500000000.0"}, }, nil}, testHeader{Header{ Typeflag: TypeReg, Name: "file1", }, nil}, testHeader{Header{ Typeflag: TypeReg, Name: "file2", PAXRecords: map[string]string{"path": "file2"}, }, nil}, testHeader{Header{ Typeflag: TypeXGlobalHeader, PAXRecords: map[string]string{"path": ""}, // Should delete "path", but keep "mtime" }, nil}, testHeader{Header{ Typeflag: TypeReg, Name: "file3", }, nil}, testHeader{Header{ Typeflag: TypeReg, Name: "file4", ModTime: time.Unix(1400000000, 0), PAXRecords: map[string]string{"mtime": "1400000000"}, }, nil}, testClose{nil}, }, }, { file: "testdata/gnu-utf8.tar", tests: []testFnc{ testHeader{Header{ Typeflag: TypeReg, Name: "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹", Mode: 0644, Uid: 1000, Gid: 1000, Uname: "☺", Gname: "⚹", ModTime: time.Unix(0, 0), Format: FormatGNU, }, nil}, testClose{nil}, }, }, { file: "testdata/gnu-not-utf8.tar", tests: []testFnc{ testHeader{Header{ Typeflag: TypeReg, Name: "hi\x80\x81\x82\x83bye", Mode: 0644, Uid: 1000, Gid: 1000, Uname: "rawr", Gname: "dsnet", ModTime: time.Unix(0, 0), Format: FormatGNU, }, nil}, testClose{nil}, }, // TODO(dsnet): Re-enable this test when adding sparse support. // See https://golang.org/issue/22735 /* }, { file: "testdata/gnu-nil-sparse-data.tar", tests: []testFnc{ testHeader{Header{ Typeflag: TypeGNUSparse, Name: "sparse.db", Size: 1000, SparseHoles: []sparseEntry{{Offset: 1000, Length: 0}}, }, nil}, testWrite{strings.Repeat("0123456789", 100), 1000, nil}, testClose{}, }, }, { file: "testdata/gnu-nil-sparse-hole.tar", tests: []testFnc{ testHeader{Header{ Typeflag: TypeGNUSparse, Name: "sparse.db", Size: 1000, SparseHoles: []sparseEntry{{Offset: 0, Length: 1000}}, }, nil}, testWrite{strings.Repeat("\x00", 1000), 1000, nil}, testClose{}, }, }, { file: "testdata/pax-nil-sparse-data.tar", tests: []testFnc{ testHeader{Header{ Typeflag: TypeReg, Name: "sparse.db", Size: 1000, SparseHoles: []sparseEntry{{Offset: 1000, Length: 0}}, }, nil}, testWrite{strings.Repeat("0123456789", 100), 1000, nil}, testClose{}, }, }, { file: "testdata/pax-nil-sparse-hole.tar", tests: []testFnc{ testHeader{Header{ Typeflag: TypeReg, Name: "sparse.db", Size: 1000, SparseHoles: []sparseEntry{{Offset: 0, Length: 1000}}, }, nil}, testWrite{strings.Repeat("\x00", 1000), 1000, nil}, testClose{}, }, }, { file: "testdata/gnu-sparse-big.tar", tests: []testFnc{ testHeader{Header{ Typeflag: TypeGNUSparse, Name: "gnu-sparse", Size: 6e10, SparseHoles: []sparseEntry{ {Offset: 0e10, Length: 1e10 - 100}, {Offset: 1e10, Length: 1e10 - 100}, {Offset: 2e10, Length: 1e10 - 100}, {Offset: 3e10, Length: 1e10 - 100}, {Offset: 4e10, Length: 1e10 - 100}, {Offset: 5e10, Length: 1e10 - 100}, }, }, nil}, testReadFrom{fileOps{ int64(1e10 - blockSize), strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), int64(1e10 - blockSize), strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), int64(1e10 - blockSize), strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), int64(1e10 - blockSize), strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), int64(1e10 - blockSize), strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), int64(1e10 - blockSize), strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), }, 6e10, nil}, testClose{nil}, }, }, { file: "testdata/pax-sparse-big.tar", tests: []testFnc{ testHeader{Header{ Typeflag: TypeReg, Name: "pax-sparse", Size: 6e10, SparseHoles: []sparseEntry{ {Offset: 0e10, Length: 1e10 - 100}, {Offset: 1e10, Length: 1e10 - 100}, {Offset: 2e10, Length: 1e10 - 100}, {Offset: 3e10, Length: 1e10 - 100}, {Offset: 4e10, Length: 1e10 - 100}, {Offset: 5e10, Length: 1e10 - 100}, }, }, nil}, testReadFrom{fileOps{ int64(1e10 - blockSize), strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), int64(1e10 - blockSize), strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), int64(1e10 - blockSize), strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), int64(1e10 - blockSize), strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), int64(1e10 - blockSize), strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), int64(1e10 - blockSize), strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), }, 6e10, nil}, testClose{nil}, }, */ }, { file: "testdata/trailing-slash.tar", tests: []testFnc{ testHeader{Header{Name: strings.Repeat("123456789/", 30)}, nil}, testClose{nil}, }, }, { // Automatically promote zero value of Typeflag depending on the name. file: "testdata/file-and-dir.tar", tests: []testFnc{ testHeader{Header{Name: "small.txt", Size: 5}, nil}, testWrite{"Kilts", 5, nil}, testHeader{Header{Name: "dir/"}, nil}, testClose{nil}, }, }} equalError := func(x, y error) bool { _, ok1 := x.(headerError) _, ok2 := y.(headerError) if ok1 || ok2 { return ok1 && ok2 } return x == y } for _, v := range vectors { t.Run(path.Base(v.file), func(t *testing.T) { const maxSize = 10 << 10 // 10KiB buf := new(bytes.Buffer) tw := NewWriter(iotest.TruncateWriter(buf, maxSize)) for i, tf := range v.tests { switch tf := tf.(type) { case testHeader: err := tw.WriteHeader(&tf.hdr) if !equalError(err, tf.wantErr) { t.Fatalf("test %d, WriteHeader() = %v, want %v", i, err, tf.wantErr) } case testWrite: got, err := tw.Write([]byte(tf.str)) if got != tf.wantCnt || !equalError(err, tf.wantErr) { t.Fatalf("test %d, Write() = (%d, %v), want (%d, %v)", i, got, err, tf.wantCnt, tf.wantErr) } case testReadFrom: f := &testFile{ops: tf.ops} got, err := tw.readFrom(f) if _, ok := err.(testError); ok { t.Errorf("test %d, ReadFrom(): %v", i, err) } else if got != tf.wantCnt || !equalError(err, tf.wantErr) { t.Errorf("test %d, ReadFrom() = (%d, %v), want (%d, %v)", i, got, err, tf.wantCnt, tf.wantErr) } if len(f.ops) > 0 { t.Errorf("test %d, expected %d more operations", i, len(f.ops)) } case testClose: err := tw.Close() if !equalError(err, tf.wantErr) { t.Fatalf("test %d, Close() = %v, want %v", i, err, tf.wantErr) } default: t.Fatalf("test %d, unknown test operation: %T", i, tf) } } if v.file != "" { want, err := os.ReadFile(v.file) if err != nil { t.Fatalf("ReadFile() = %v, want nil", err) } got := buf.Bytes() if !bytes.Equal(want, got) { t.Fatalf("incorrect result: (-got +want)\n%v", bytediff(got, want)) } } }) } } func TestPax(t *testing.T) { // Create an archive with a large name fileinfo, err := os.Stat("testdata/small.txt") if err != nil { t.Fatal(err) } hdr, err := FileInfoHeader(fileinfo, "") if err != nil { t.Fatalf("os.Stat: %v", err) } // Force a PAX long name to be written longName := strings.Repeat("ab", 100) contents := strings.Repeat(" ", int(hdr.Size)) hdr.Name = longName var buf bytes.Buffer writer := NewWriter(&buf) if err := writer.WriteHeader(hdr); err != nil { t.Fatal(err) } if _, err = writer.Write([]byte(contents)); err != nil { t.Fatal(err) } if err := writer.Close(); err != nil { t.Fatal(err) } // Simple test to make sure PAX extensions are in effect if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) { t.Fatal("Expected at least one PAX header to be written.") } // Test that we can get a long name back out of the archive. reader := NewReader(&buf) hdr, err = reader.Next() if err != nil { t.Fatal(err) } if hdr.Name != longName { t.Fatal("Couldn't recover long file name") } } func TestPaxSymlink(t *testing.T) { // Create an archive with a large linkname fileinfo, err := os.Stat("testdata/small.txt") if err != nil { t.Fatal(err) } hdr, err := FileInfoHeader(fileinfo, "") hdr.Typeflag = TypeSymlink if err != nil { t.Fatalf("os.Stat:1 %v", err) } // Force a PAX long linkname to be written longLinkname := strings.Repeat("1234567890/1234567890", 10) hdr.Linkname = longLinkname hdr.Size = 0 var buf bytes.Buffer writer := NewWriter(&buf) if err := writer.WriteHeader(hdr); err != nil { t.Fatal(err) } if err := writer.Close(); err != nil { t.Fatal(err) } // Simple test to make sure PAX extensions are in effect if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) { t.Fatal("Expected at least one PAX header to be written.") } // Test that we can get a long name back out of the archive. reader := NewReader(&buf) hdr, err = reader.Next() if err != nil { t.Fatal(err) } if hdr.Linkname != longLinkname { t.Fatal("Couldn't recover long link name") } } func TestPaxNonAscii(t *testing.T) { // Create an archive with non ascii. These should trigger a pax header // because pax headers have a defined utf-8 encoding. fileinfo, err := os.Stat("testdata/small.txt") if err != nil { t.Fatal(err) } hdr, err := FileInfoHeader(fileinfo, "") if err != nil { t.Fatalf("os.Stat:1 %v", err) } // some sample data chineseFilename := "文件名" chineseGroupname := "組" chineseUsername := "用戶名" hdr.Name = chineseFilename hdr.Gname = chineseGroupname hdr.Uname = chineseUsername contents := strings.Repeat(" ", int(hdr.Size)) var buf bytes.Buffer writer := NewWriter(&buf) if err := writer.WriteHeader(hdr); err != nil { t.Fatal(err) } if _, err = writer.Write([]byte(contents)); err != nil { t.Fatal(err) } if err := writer.Close(); err != nil { t.Fatal(err) } // Simple test to make sure PAX extensions are in effect if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) { t.Fatal("Expected at least one PAX header to be written.") } // Test that we can get a long name back out of the archive. reader := NewReader(&buf) hdr, err = reader.Next() if err != nil { t.Fatal(err) } if hdr.Name != chineseFilename { t.Fatal("Couldn't recover unicode name") } if hdr.Gname != chineseGroupname { t.Fatal("Couldn't recover unicode group") } if hdr.Uname != chineseUsername { t.Fatal("Couldn't recover unicode user") } } func TestPaxXattrs(t *testing.T) { xattrs := map[string]string{ "user.key": "value", } // Create an archive with an xattr fileinfo, err := os.Stat("testdata/small.txt") if err != nil { t.Fatal(err) } hdr, err := FileInfoHeader(fileinfo, "") if err != nil { t.Fatalf("os.Stat: %v", err) } contents := "Kilts" hdr.Xattrs = xattrs var buf bytes.Buffer writer := NewWriter(&buf) if err := writer.WriteHeader(hdr); err != nil { t.Fatal(err) } if _, err = writer.Write([]byte(contents)); err != nil { t.Fatal(err) } if err := writer.Close(); err != nil { t.Fatal(err) } // Test that we can get the xattrs back out of the archive. reader := NewReader(&buf) hdr, err = reader.Next() if err != nil { t.Fatal(err) } if !reflect.DeepEqual(hdr.Xattrs, xattrs) { t.Fatalf("xattrs did not survive round trip: got %+v, want %+v", hdr.Xattrs, xattrs) } } func TestPaxHeadersSorted(t *testing.T) { fileinfo, err := os.Stat("testdata/small.txt") if err != nil { t.Fatal(err) } hdr, err := FileInfoHeader(fileinfo, "") if err != nil { t.Fatalf("os.Stat: %v", err) } contents := strings.Repeat(" ", int(hdr.Size)) hdr.Xattrs = map[string]string{ "foo": "foo", "bar": "bar", "baz": "baz", "qux": "qux", } var buf bytes.Buffer writer := NewWriter(&buf) if err := writer.WriteHeader(hdr); err != nil { t.Fatal(err) } if _, err = writer.Write([]byte(contents)); err != nil { t.Fatal(err) } if err := writer.Close(); err != nil { t.Fatal(err) } // Simple test to make sure PAX extensions are in effect if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) { t.Fatal("Expected at least one PAX header to be written.") } // xattr bar should always appear before others indices := []int{ bytes.Index(buf.Bytes(), []byte("bar=bar")), bytes.Index(buf.Bytes(), []byte("baz=baz")), bytes.Index(buf.Bytes(), []byte("foo=foo")), bytes.Index(buf.Bytes(), []byte("qux=qux")), } if !sort.IntsAreSorted(indices) { t.Fatal("PAX headers are not sorted") } } func TestUSTARLongName(t *testing.T) { // Create an archive with a path that failed to split with USTAR extension in previous versions. fileinfo, err := os.Stat("testdata/small.txt") if err != nil { t.Fatal(err) } hdr, err := FileInfoHeader(fileinfo, "") hdr.Typeflag = TypeDir if err != nil { t.Fatalf("os.Stat:1 %v", err) } // Force a PAX long name to be written. The name was taken from a practical example // that fails and replaced ever char through numbers to anonymize the sample. longName := "/0000_0000000/00000-000000000/0000_0000000/00000-0000000000000/0000_0000000/00000-0000000-00000000/0000_0000000/00000000/0000_0000000/000/0000_0000000/00000000v00/0000_0000000/000000/0000_0000000/0000000/0000_0000000/00000y-00/0000/0000/00000000/0x000000/" hdr.Name = longName hdr.Size = 0 var buf bytes.Buffer writer := NewWriter(&buf) if err := writer.WriteHeader(hdr); err != nil { t.Fatal(err) } if err := writer.Close(); err != nil { t.Fatal(err) } // Test that we can get a long name back out of the archive. reader := NewReader(&buf) hdr, err = reader.Next() if err != nil { t.Fatal(err) } if hdr.Name != longName { t.Fatal("Couldn't recover long name") } } func TestValidTypeflagWithPAXHeader(t *testing.T) { var buffer bytes.Buffer tw := NewWriter(&buffer) fileName := strings.Repeat("ab", 100) hdr := &Header{ Name: fileName, Size: 4, Typeflag: 0, } if err := tw.WriteHeader(hdr); err != nil { t.Fatalf("Failed to write header: %s", err) } if _, err := tw.Write([]byte("fooo")); err != nil { t.Fatalf("Failed to write the file's data: %s", err) } tw.Close() tr := NewReader(&buffer) for { header, err := tr.Next() if err == io.EOF { break } if err != nil { t.Fatalf("Failed to read header: %s", err) } if header.Typeflag != TypeReg { t.Fatalf("Typeflag should've been %d, found %d", TypeReg, header.Typeflag) } } } // failOnceWriter fails exactly once and then always reports success. type failOnceWriter bool func (w *failOnceWriter) Write(b []byte) (int, error) { if !*w { return 0, io.ErrShortWrite } *w = true return len(b), nil } func TestWriterErrors(t *testing.T) { t.Run("HeaderOnly", func(t *testing.T) { tw := NewWriter(new(bytes.Buffer)) hdr := &Header{Name: "dir/", Typeflag: TypeDir} if err := tw.WriteHeader(hdr); err != nil { t.Fatalf("WriteHeader() = %v, want nil", err) } if _, err := tw.Write([]byte{0x00}); err != ErrWriteTooLong { t.Fatalf("Write() = %v, want %v", err, ErrWriteTooLong) } }) t.Run("NegativeSize", func(t *testing.T) { tw := NewWriter(new(bytes.Buffer)) hdr := &Header{Name: "small.txt", Size: -1} if err := tw.WriteHeader(hdr); err == nil { t.Fatalf("WriteHeader() = nil, want non-nil error") } }) t.Run("BeforeHeader", func(t *testing.T) { tw := NewWriter(new(bytes.Buffer)) if _, err := tw.Write([]byte("Kilts")); err != ErrWriteTooLong { t.Fatalf("Write() = %v, want %v", err, ErrWriteTooLong) } }) t.Run("AfterClose", func(t *testing.T) { tw := NewWriter(new(bytes.Buffer)) hdr := &Header{Name: "small.txt"} if err := tw.WriteHeader(hdr); err != nil { t.Fatalf("WriteHeader() = %v, want nil", err) } if err := tw.Close(); err != nil { t.Fatalf("Close() = %v, want nil", err) } if _, err := tw.Write([]byte("Kilts")); err != ErrWriteAfterClose { t.Fatalf("Write() = %v, want %v", err, ErrWriteAfterClose) } if err := tw.Flush(); err != ErrWriteAfterClose { t.Fatalf("Flush() = %v, want %v", err, ErrWriteAfterClose) } if err := tw.Close(); err != nil { t.Fatalf("Close() = %v, want nil", err) } }) t.Run("PrematureFlush", func(t *testing.T) { tw := NewWriter(new(bytes.Buffer)) hdr := &Header{Name: "small.txt", Size: 5} if err := tw.WriteHeader(hdr); err != nil { t.Fatalf("WriteHeader() = %v, want nil", err) } if err := tw.Flush(); err == nil { t.Fatalf("Flush() = %v, want non-nil error", err) } }) t.Run("PrematureClose", func(t *testing.T) { tw := NewWriter(new(bytes.Buffer)) hdr := &Header{Name: "small.txt", Size: 5} if err := tw.WriteHeader(hdr); err != nil { t.Fatalf("WriteHeader() = %v, want nil", err) } if err := tw.Close(); err == nil { t.Fatalf("Close() = %v, want non-nil error", err) } }) t.Run("Persistence", func(t *testing.T) { tw := NewWriter(new(failOnceWriter)) if err := tw.WriteHeader(&Header{}); err != io.ErrShortWrite { t.Fatalf("WriteHeader() = %v, want %v", err, io.ErrShortWrite) } if err := tw.WriteHeader(&Header{Name: "small.txt"}); err == nil { t.Errorf("WriteHeader() = got %v, want non-nil error", err) } if _, err := tw.Write(nil); err == nil { t.Errorf("Write() = %v, want non-nil error", err) } if err := tw.Flush(); err == nil { t.Errorf("Flush() = %v, want non-nil error", err) } if err := tw.Close(); err == nil { t.Errorf("Close() = %v, want non-nil error", err) } }) } func TestSplitUSTARPath(t *testing.T) { sr := strings.Repeat vectors := []struct { input string // Input path prefix string // Expected output prefix suffix string // Expected output suffix ok bool // Split success? }{ {"", "", "", false}, {"abc", "", "", false}, {"用戶名", "", "", false}, {sr("a", nameSize), "", "", false}, {sr("a", nameSize) + "/", "", "", false}, {sr("a", nameSize) + "/a", sr("a", nameSize), "a", true}, {sr("a", prefixSize) + "/", "", "", false}, {sr("a", prefixSize) + "/a", sr("a", prefixSize), "a", true}, {sr("a", nameSize+1), "", "", false}, {sr("/", nameSize+1), sr("/", nameSize-1), "/", true}, {sr("a", prefixSize) + "/" + sr("b", nameSize), sr("a", prefixSize), sr("b", nameSize), true}, {sr("a", prefixSize) + "//" + sr("b", nameSize), "", "", false}, {sr("a/", nameSize), sr("a/", 77) + "a", sr("a/", 22), true}, } for _, v := range vectors { prefix, suffix, ok := splitUSTARPath(v.input) if prefix != v.prefix || suffix != v.suffix || ok != v.ok { t.Errorf("splitUSTARPath(%q):\ngot (%q, %q, %v)\nwant (%q, %q, %v)", v.input, prefix, suffix, ok, v.prefix, v.suffix, v.ok) } } } // TestIssue12594 tests that the Writer does not attempt to populate the prefix // field when encoding a header in the GNU format. The prefix field is valid // in USTAR and PAX, but not GNU. func TestIssue12594(t *testing.T) { names := []string{ "0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/file.txt", "0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/31/32/33/file.txt", "0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/31/32/333/file.txt", "0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/31/32/33/34/35/36/37/38/39/40/file.txt", "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000/file.txt", "/home/support/.openoffice.org/3/user/uno_packages/cache/registry/com.sun.star.comp.deployment.executable.PackageRegistryBackend", } for i, name := range names { var b bytes.Buffer tw := NewWriter(&b) if err := tw.WriteHeader(&Header{ Name: name, Uid: 1 << 25, // Prevent USTAR format }); err != nil { t.Errorf("test %d, unexpected WriteHeader error: %v", i, err) } if err := tw.Close(); err != nil { t.Errorf("test %d, unexpected Close error: %v", i, err) } // The prefix field should never appear in the GNU format. var blk block copy(blk[:], b.Bytes()) prefix := string(blk.USTAR().Prefix()) if i := strings.IndexByte(prefix, 0); i >= 0 { prefix = prefix[:i] // Truncate at the NUL terminator } if blk.GetFormat() == FormatGNU && len(prefix) > 0 && strings.HasPrefix(name, prefix) { t.Errorf("test %d, found prefix in GNU format: %s", i, prefix) } tr := NewReader(&b) hdr, err := tr.Next() if err != nil { t.Errorf("test %d, unexpected Next error: %v", i, err) } if hdr.Name != name { t.Errorf("test %d, hdr.Name = %s, want %s", i, hdr.Name, name) } } } // testNonEmptyWriter wraps an io.Writer and ensures that // Write is never called with an empty buffer. type testNonEmptyWriter struct{ io.Writer } func (w testNonEmptyWriter) Write(b []byte) (int, error) { if len(b) == 0 { return 0, errors.New("unexpected empty Write call") } return w.Writer.Write(b) } func TestFileWriter(t *testing.T) { type ( testWrite struct { // Write(str) == (wantCnt, wantErr) str string wantCnt int wantErr error } testReadFrom struct { // ReadFrom(testFile{ops}) == (wantCnt, wantErr) ops fileOps wantCnt int64 wantErr error } testRemaining struct { // LogicalRemaining() == wantLCnt, PhysicalRemaining() == wantPCnt wantLCnt int64 wantPCnt int64 } testFnc interface{} // testWrite | testReadFrom | testRemaining ) type ( makeReg struct { size int64 wantStr string } makeSparse struct { makeReg makeReg sph sparseHoles size int64 } fileMaker interface{} // makeReg | makeSparse ) vectors := []struct { maker fileMaker tests []testFnc }{{ maker: makeReg{0, ""}, tests: []testFnc{ testRemaining{0, 0}, testWrite{"", 0, nil}, testWrite{"a", 0, ErrWriteTooLong}, testReadFrom{fileOps{""}, 0, nil}, testReadFrom{fileOps{"a"}, 0, ErrWriteTooLong}, testRemaining{0, 0}, }, }, { maker: makeReg{1, "a"}, tests: []testFnc{ testRemaining{1, 1}, testWrite{"", 0, nil}, testWrite{"a", 1, nil}, testWrite{"bcde", 0, ErrWriteTooLong}, testWrite{"", 0, nil}, testReadFrom{fileOps{""}, 0, nil}, testReadFrom{fileOps{"a"}, 0, ErrWriteTooLong}, testRemaining{0, 0}, }, }, { maker: makeReg{5, "hello"}, tests: []testFnc{ testRemaining{5, 5}, testWrite{"hello", 5, nil}, testRemaining{0, 0}, }, }, { maker: makeReg{5, "\x00\x00\x00\x00\x00"}, tests: []testFnc{ testRemaining{5, 5}, testReadFrom{fileOps{"\x00\x00\x00\x00\x00"}, 5, nil}, testRemaining{0, 0}, }, }, { maker: makeReg{5, "\x00\x00\x00\x00\x00"}, tests: []testFnc{ testRemaining{5, 5}, testReadFrom{fileOps{"\x00\x00\x00\x00\x00extra"}, 5, ErrWriteTooLong}, testRemaining{0, 0}, }, }, { maker: makeReg{5, "abc\x00\x00"}, tests: []testFnc{ testRemaining{5, 5}, testWrite{"abc", 3, nil}, testRemaining{2, 2}, testReadFrom{fileOps{"\x00\x00"}, 2, nil}, testRemaining{0, 0}, }, }, { maker: makeReg{5, "\x00\x00abc"}, tests: []testFnc{ testRemaining{5, 5}, testWrite{"\x00\x00", 2, nil}, testRemaining{3, 3}, testWrite{"abc", 3, nil}, testReadFrom{fileOps{"z"}, 0, ErrWriteTooLong}, testWrite{"z", 0, ErrWriteTooLong}, testRemaining{0, 0}, }, }, { maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, tests: []testFnc{ testRemaining{8, 5}, testWrite{"ab\x00\x00\x00cde", 8, nil}, testWrite{"a", 0, ErrWriteTooLong}, testRemaining{0, 0}, }, }, { maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, tests: []testFnc{ testWrite{"ab\x00\x00\x00cdez", 8, ErrWriteTooLong}, testRemaining{0, 0}, }, }, { maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, tests: []testFnc{ testWrite{"ab\x00", 3, nil}, testRemaining{5, 3}, testWrite{"\x00\x00cde", 5, nil}, testWrite{"a", 0, ErrWriteTooLong}, testRemaining{0, 0}, }, }, { maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, tests: []testFnc{ testWrite{"ab", 2, nil}, testRemaining{6, 3}, testReadFrom{fileOps{int64(3), "cde"}, 6, nil}, testRemaining{0, 0}, }, }, { maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, tests: []testFnc{ testReadFrom{fileOps{"ab", int64(3), "cde"}, 8, nil}, testRemaining{0, 0}, }, }, { maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, tests: []testFnc{ testReadFrom{fileOps{"ab", int64(3), "cdeX"}, 8, ErrWriteTooLong}, testRemaining{0, 0}, }, }, { maker: makeSparse{makeReg{4, "abcd"}, sparseHoles{{2, 3}}, 8}, tests: []testFnc{ testReadFrom{fileOps{"ab", int64(3), "cd"}, 7, io.ErrUnexpectedEOF}, testRemaining{1, 0}, }, }, { maker: makeSparse{makeReg{4, "abcd"}, sparseHoles{{2, 3}}, 8}, tests: []testFnc{ testReadFrom{fileOps{"ab", int64(3), "cde"}, 7, errMissData}, testRemaining{1, 0}, }, }, { maker: makeSparse{makeReg{6, "abcde"}, sparseHoles{{2, 3}}, 8}, tests: []testFnc{ testReadFrom{fileOps{"ab", int64(3), "cde"}, 8, errUnrefData}, testRemaining{0, 1}, }, }, { maker: makeSparse{makeReg{4, "abcd"}, sparseHoles{{2, 3}}, 8}, tests: []testFnc{ testWrite{"ab", 2, nil}, testRemaining{6, 2}, testWrite{"\x00\x00\x00", 3, nil}, testRemaining{3, 2}, testWrite{"cde", 2, errMissData}, testRemaining{1, 0}, }, }, { maker: makeSparse{makeReg{6, "abcde"}, sparseHoles{{2, 3}}, 8}, tests: []testFnc{ testWrite{"ab", 2, nil}, testRemaining{6, 4}, testWrite{"\x00\x00\x00", 3, nil}, testRemaining{3, 4}, testWrite{"cde", 3, errUnrefData}, testRemaining{0, 1}, }, }, { maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, tests: []testFnc{ testRemaining{7, 3}, testWrite{"\x00\x00abc\x00\x00", 7, nil}, testRemaining{0, 0}, }, }, { maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, tests: []testFnc{ testRemaining{7, 3}, testReadFrom{fileOps{int64(2), "abc", int64(1), "\x00"}, 7, nil}, testRemaining{0, 0}, }, }, { maker: makeSparse{makeReg{3, ""}, sparseHoles{{0, 2}, {5, 2}}, 7}, tests: []testFnc{ testWrite{"abcdefg", 0, errWriteHole}, }, }, { maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, tests: []testFnc{ testWrite{"\x00\x00abcde", 5, errWriteHole}, }, }, { maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, tests: []testFnc{ testWrite{"\x00\x00abc\x00\x00z", 7, ErrWriteTooLong}, testRemaining{0, 0}, }, }, { maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, tests: []testFnc{ testWrite{"\x00\x00", 2, nil}, testRemaining{5, 3}, testWrite{"abc", 3, nil}, testRemaining{2, 0}, testWrite{"\x00\x00", 2, nil}, testRemaining{0, 0}, }, }, { maker: makeSparse{makeReg{2, "ab"}, sparseHoles{{0, 2}, {5, 2}}, 7}, tests: []testFnc{ testWrite{"\x00\x00", 2, nil}, testWrite{"abc", 2, errMissData}, testWrite{"\x00\x00", 0, errMissData}, }, }, { maker: makeSparse{makeReg{4, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, tests: []testFnc{ testWrite{"\x00\x00", 2, nil}, testWrite{"abc", 3, nil}, testWrite{"\x00\x00", 2, errUnrefData}, }, }} for i, v := range vectors { var wantStr string bb := new(bytes.Buffer) w := testNonEmptyWriter{bb} var fw fileWriter switch maker := v.maker.(type) { case makeReg: fw = ®FileWriter{w, maker.size} wantStr = maker.wantStr case makeSparse: if !validateSparseEntries(maker.sph, maker.size) { t.Fatalf("invalid sparse map: %v", maker.sph) } spd := invertSparseEntries(maker.sph, maker.size) fw = ®FileWriter{w, maker.makeReg.size} fw = &sparseFileWriter{fw, spd, 0} wantStr = maker.makeReg.wantStr default: t.Fatalf("test %d, unknown make operation: %T", i, maker) } for j, tf := range v.tests { switch tf := tf.(type) { case testWrite: got, err := fw.Write([]byte(tf.str)) if got != tf.wantCnt || err != tf.wantErr { t.Errorf("test %d.%d, Write(%s):\ngot (%d, %v)\nwant (%d, %v)", i, j, tf.str, got, err, tf.wantCnt, tf.wantErr) } case testReadFrom: f := &testFile{ops: tf.ops} got, err := fw.ReadFrom(f) if _, ok := err.(testError); ok { t.Errorf("test %d.%d, ReadFrom(): %v", i, j, err) } else if got != tf.wantCnt || err != tf.wantErr { t.Errorf("test %d.%d, ReadFrom() = (%d, %v), want (%d, %v)", i, j, got, err, tf.wantCnt, tf.wantErr) } if len(f.ops) > 0 { t.Errorf("test %d.%d, expected %d more operations", i, j, len(f.ops)) } case testRemaining: if got := fw.LogicalRemaining(); got != tf.wantLCnt { t.Errorf("test %d.%d, LogicalRemaining() = %d, want %d", i, j, got, tf.wantLCnt) } if got := fw.PhysicalRemaining(); got != tf.wantPCnt { t.Errorf("test %d.%d, PhysicalRemaining() = %d, want %d", i, j, got, tf.wantPCnt) } default: t.Fatalf("test %d.%d, unknown test operation: %T", i, j, tf) } } if got := bb.String(); got != wantStr { t.Fatalf("test %d, String() = %q, want %q", i, got, wantStr) } } } vbatts-tar-split-6881021/cmd/000077500000000000000000000000001467537433400157105ustar00rootroot00000000000000vbatts-tar-split-6881021/cmd/tar-split/000077500000000000000000000000001467537433400176275ustar00rootroot00000000000000vbatts-tar-split-6881021/cmd/tar-split/README.md000066400000000000000000000016501467537433400211100ustar00rootroot00000000000000# tar-split utility ## Installation go get -u github.com/vbatts/tar-split/cmd/tar-split ## Usage ### Disassembly ```bash $ sha256sum archive.tar d734a748db93ec873392470510b8a1c88929abd8fae2540dc43d5b26f7537868 archive.tar $ mkdir ./x $ tar-split disasm --output tar-data.json.gz ./archive.tar | tar -C ./x -x time="2015-07-20T15:45:04-04:00" level=info msg="created tar-data.json.gz from ./archive.tar (read 204800 bytes)" ``` ### Assembly ```bash $ tar-split asm --output new.tar --input ./tar-data.json.gz --path ./x/ INFO[0000] created new.tar from ./x/ and ./tar-data.json.gz (wrote 204800 bytes) $ sha256sum new.tar d734a748db93ec873392470510b8a1c88929abd8fae2540dc43d5b26f7537868 new.tar ``` ### Estimating metadata size ```bash $ tar-split checksize ./archive.tar inspecting "./archive.tar" (size 200k) -- number of files: 28 -- size of metadata uncompressed: 28k -- size of gzip compressed metadata: 1k ``` vbatts-tar-split-6881021/cmd/tar-split/asm.go000066400000000000000000000030541467537433400207400ustar00rootroot00000000000000package main import ( "compress/gzip" "io" "os" "github.com/sirupsen/logrus" "github.com/urfave/cli" "github.com/vbatts/tar-split/tar/asm" "github.com/vbatts/tar-split/tar/storage" ) func CommandAsm(c *cli.Context) { if len(c.Args()) > 0 { logrus.Warnf("%d additional arguments passed are ignored", len(c.Args())) } if len(c.String("input")) == 0 { logrus.Fatalf("--input filename must be set") } if len(c.String("output")) == 0 { logrus.Fatalf("--output filename must be set ([FILENAME|-])") } if len(c.String("path")) == 0 { logrus.Fatalf("--path must be set") } var outputStream io.Writer if c.String("output") == "-" { outputStream = os.Stdout } else { fh, err := os.Create(c.String("output")) if err != nil { logrus.Fatal(err) } defer fh.Close() outputStream = fh } if c.Bool("compress") { zipper := gzip.NewWriter(outputStream) defer zipper.Close() outputStream = zipper } // Get the tar metadata reader mf, err := os.Open(c.String("input")) if err != nil { logrus.Fatal(err) } defer mf.Close() mfz, err := gzip.NewReader(mf) if err != nil { logrus.Fatal(err) } defer mfz.Close() metaUnpacker := storage.NewJSONUnpacker(mfz) // XXX maybe get the absolute path here fileGetter := storage.NewPathFileGetter(c.String("path")) ots := asm.NewOutputTarStream(fileGetter, metaUnpacker) defer ots.Close() i, err := io.Copy(outputStream, ots) if err != nil { logrus.Fatal(err) } logrus.Infof("created %s from %s and %s (wrote %d bytes)", c.String("output"), c.String("path"), c.String("input"), i) } vbatts-tar-split-6881021/cmd/tar-split/checksize.go000066400000000000000000000041221467537433400221250ustar00rootroot00000000000000package main import ( "archive/tar" "compress/gzip" "fmt" "io" "log" "os" "github.com/sirupsen/logrus" "github.com/urfave/cli" "github.com/vbatts/tar-split/tar/asm" "github.com/vbatts/tar-split/tar/storage" ) func CommandChecksize(c *cli.Context) { if len(c.Args()) == 0 { logrus.Fatalf("please specify tar archives to check ('-' will check stdin)") } for _, arg := range c.Args() { fh, err := os.Open(arg) if err != nil { log.Fatal(err) } defer fh.Close() fi, err := fh.Stat() if err != nil { log.Fatal(err) } fmt.Printf("inspecting %q (size %dk)\n", fh.Name(), fi.Size()/1024) packFh, err := os.CreateTemp("", "packed.") if err != nil { log.Fatal(err) } defer packFh.Close() if !c.Bool("work") { defer os.Remove(packFh.Name()) } else { fmt.Printf(" -- working file preserved: %s\n", packFh.Name()) } sp := storage.NewJSONPacker(packFh) fp := storage.NewDiscardFilePutter() dissam, err := asm.NewInputTarStream(fh, sp, fp) if err != nil { log.Fatal(err) } var num int tr := tar.NewReader(dissam) for { _, err = tr.Next() if err != nil { if err == io.EOF { break } log.Fatal(err) } num++ if _, err := io.Copy(io.Discard, tr); err != nil { log.Fatal(err) } } fmt.Printf(" -- number of files: %d\n", num) if err := packFh.Sync(); err != nil { log.Fatal(err) } fi, err = packFh.Stat() if err != nil { log.Fatal(err) } fmt.Printf(" -- size of metadata uncompressed: %dk\n", fi.Size()/1024) gzPackFh, err := os.CreateTemp("", "packed.gz.") if err != nil { log.Fatal(err) } defer gzPackFh.Close() if !c.Bool("work") { defer os.Remove(gzPackFh.Name()) } gzWrtr := gzip.NewWriter(gzPackFh) if _, err := packFh.Seek(0, 0); err != nil { log.Fatal(err) } if _, err := io.Copy(gzWrtr, packFh); err != nil { log.Fatal(err) } gzWrtr.Close() if err := gzPackFh.Sync(); err != nil { log.Fatal(err) } fi, err = gzPackFh.Stat() if err != nil { log.Fatal(err) } fmt.Printf(" -- size of gzip compressed metadata: %dk\n", fi.Size()/1024) } } vbatts-tar-split-6881021/cmd/tar-split/disasm.go000066400000000000000000000025671467537433400214500ustar00rootroot00000000000000package main import ( "compress/gzip" "io" "os" "github.com/sirupsen/logrus" "github.com/urfave/cli" "github.com/vbatts/tar-split/tar/asm" "github.com/vbatts/tar-split/tar/storage" ) func CommandDisasm(c *cli.Context) { if len(c.Args()) != 1 { logrus.Fatalf("please specify tar to be disabled ") } if len(c.String("output")) == 0 { logrus.Fatalf("--output filename must be set") } // Set up the tar input stream var inputStream io.Reader if c.Args()[0] == "-" { inputStream = os.Stdin } else { fh, err := os.Open(c.Args()[0]) if err != nil { logrus.Fatal(err) } defer fh.Close() inputStream = fh } // Set up the metadata storage mf, err := os.OpenFile(c.String("output"), os.O_CREATE|os.O_WRONLY|os.O_TRUNC, os.FileMode(0600)) if err != nil { logrus.Fatal(err) } defer mf.Close() mfz := gzip.NewWriter(mf) defer mfz.Close() metaPacker := storage.NewJSONPacker(mfz) // we're passing nil here for the file putter, because the ApplyDiff will // handle the extraction of the archive its, err := asm.NewInputTarStream(inputStream, metaPacker, nil) if err != nil { logrus.Fatal(err) } var out io.Writer if c.Bool("no-stdout") { out = io.Discard } else { out = os.Stdout } i, err := io.Copy(out, its) if err != nil { logrus.Fatal(err) } logrus.Infof("created %s from %s (read %d bytes)", c.String("output"), c.Args()[0], i) } vbatts-tar-split-6881021/cmd/tar-split/main.go000066400000000000000000000037251467537433400211110ustar00rootroot00000000000000package main import ( "os" "github.com/sirupsen/logrus" "github.com/urfave/cli" ) var Version = "v0.11.3" func main() { app := cli.NewApp() app.Name = "tar-split" app.Usage = "tar assembly and disassembly utility" app.Version = Version app.Author = "Vincent Batts" app.Email = "vbatts@hashbangbash.com" app.Action = cli.ShowAppHelp app.Before = func(c *cli.Context) error { logrus.SetOutput(os.Stderr) if c.Bool("debug") { logrus.SetLevel(logrus.DebugLevel) } return nil } app.Flags = []cli.Flag{ cli.BoolFlag{ Name: "debug, D", Usage: "debug output", // defaults to false }, } app.Commands = []cli.Command{ { Name: "disasm", Aliases: []string{"d"}, Usage: "disassemble the input tar stream", Action: CommandDisasm, Flags: []cli.Flag{ cli.StringFlag{ Name: "output", Value: "tar-data.json.gz", Usage: "output of disassembled tar stream", }, cli.BoolFlag{ Name: "no-stdout", Usage: "do not throughput the stream to STDOUT", }, }, }, { Name: "asm", Aliases: []string{"a"}, Usage: "assemble tar stream", Action: CommandAsm, Flags: []cli.Flag{ cli.StringFlag{ Name: "input", Value: "tar-data.json.gz", Usage: "input of disassembled tar stream", }, cli.StringFlag{ Name: "output", Value: "-", Usage: "reassembled tar archive", }, cli.StringFlag{ Name: "path", Value: "", Usage: "relative path of extracted tar", }, cli.BoolFlag{ Name: "compress", Usage: "gzip compress the output", // defaults to false }, }, }, { Name: "checksize", Usage: "displays size estimates for metadata storage of a Tar archive", Action: CommandChecksize, Flags: []cli.Flag{ cli.BoolFlag{ Name: "work", Usage: "do not delete the working directory", // defaults to false }, }, }, } if err := app.Run(os.Args); err != nil { logrus.Fatal(err) } } vbatts-tar-split-6881021/cmd/tar-split/tar_benchmark_test.go000066400000000000000000000032701467537433400240170ustar00rootroot00000000000000package main import ( "io" "os" "testing" upTar "archive/tar" ourTar "github.com/vbatts/tar-split/archive/tar" ) var testfile = "../../archive/tar/testdata/sparse-formats.tar" func BenchmarkUpstreamTar(b *testing.B) { for n := 0; n < b.N; n++ { fh, err := os.Open(testfile) if err != nil { b.Fatal(err) } tr := upTar.NewReader(fh) for { _, err := tr.Next() if err != nil { if err == io.EOF { break } fh.Close() b.Fatal(err) } _, err = io.Copy(io.Discard, tr) if err != nil { b.Fatal(err) } } if err := fh.Close(); err != nil { b.Fatal(err) } } } func BenchmarkOurTarNoAccounting(b *testing.B) { for n := 0; n < b.N; n++ { fh, err := os.Open(testfile) if err != nil { b.Fatal(err) } tr := ourTar.NewReader(fh) tr.RawAccounting = false // this is default, but explicit here for { _, err := tr.Next() if err != nil { if err == io.EOF { break } fh.Close() b.Fatal(err) } _, err = io.Copy(io.Discard, tr) if err != nil { b.Fatal(err) } } if err := fh.Close(); err != nil { b.Fatal(err) } } } func BenchmarkOurTarYesAccounting(b *testing.B) { for n := 0; n < b.N; n++ { fh, err := os.Open(testfile) if err != nil { b.Fatal(err) } tr := ourTar.NewReader(fh) tr.RawAccounting = true // This enables mechanics for collecting raw bytes for { _ = tr.RawBytes() _, err := tr.Next() _ = tr.RawBytes() if err != nil { if err == io.EOF { break } fh.Close() b.Fatal(err) } _, err = io.Copy(io.Discard, tr) if err != nil { b.Fatal(err) } _ = tr.RawBytes() } if err := fh.Close(); err != nil { b.Fatal(err) } } } vbatts-tar-split-6881021/concept/000077500000000000000000000000001467537433400166005ustar00rootroot00000000000000vbatts-tar-split-6881021/concept/DESIGN.md000066400000000000000000000061571467537433400201040ustar00rootroot00000000000000# Flow of TAR stream ## `./archive/tar` The import path `github.com/vbatts/tar-split/archive/tar` is fork of upstream golang stdlib [`archive/tar`](http://golang.org/pkg/archive/tar/). It adds plumbing to access raw bytes of the tar stream as the headers and payload are read. ## Packer interface For ease of storage and usage of the raw bytes, there will be a storage interface, that accepts an io.Writer (This way you could pass it an in memory buffer or a file handle). Having a Packer interface can allow configuration of hash.Hash for file payloads and providing your own io.Writer. Instead of having a state directory to store all the header information for all Readers, we will leave that up to user of Reader. Because we can not assume an ID for each Reader, and keeping that information differentiated. ## State Directory Perhaps we could deduplicate the header info, by hashing the rawbytes and storing them in a directory tree like: ./ac/dc/beef Then reference the hash of the header info, in the positional records for the tar stream. Though this could be a future feature, and not required for an initial implementation. Also, this would imply an owned state directory, rather than just writing storage info to an io.Writer. ## Concept Example First we'll get an archive to work with. For repeatability, we'll make an archive from what you've just cloned: ``` git archive --format=tar -o tar-split.tar HEAD . ``` Then build the example main.go: ``` go build ./main.go ``` Now run the example over the archive: ``` $ ./main tar-split.tar 2015/02/20 15:00:58 writing "tar-split.tar" to "tar-split.tar.out" pax_global_header pre: 512 read: 52 .travis.yml pre: 972 read: 374 DESIGN.md pre: 650 read: 1131 LICENSE pre: 917 read: 1075 README.md pre: 973 read: 4289 archive/ pre: 831 read: 0 archive/tar/ pre: 512 read: 0 archive/tar/common.go pre: 512 read: 7790 [...] tar/storage/entry_test.go pre: 667 read: 1137 tar/storage/getter.go pre: 911 read: 2741 tar/storage/getter_test.go pre: 843 read: 1491 tar/storage/packer.go pre: 557 read: 3141 tar/storage/packer_test.go pre: 955 read: 3096 EOF padding: 1512 Remainder: 512 Size: 215040; Sum: 215040 ``` *What are we seeing here?* * `pre` is the header of a file entry, and potentially the padding from the end of the prior file's payload. Also with particular tar extensions and pax attributes, the header can exceed 512 bytes. * `read` is the size of the file payload from the entry * `EOF padding` is the expected 1024 null bytes on the end of a tar archive, plus potential padding from the end of the prior file entry's payload * `Remainder` is the remaining bytes of an archive. This is typically deadspace as most tar implmentations will return after having reached the end of the 1024 null bytes. Though various implementations will include some amount of bytes here, which will affect the checksum of the resulting tar archive, therefore this must be accounted for as well. Ideally the input tar and output `*.out`, will match: ``` $ sha1sum tar-split.tar* ca9e19966b892d9ad5960414abac01ef585a1e22 tar-split.tar ca9e19966b892d9ad5960414abac01ef585a1e22 tar-split.tar.out ``` vbatts-tar-split-6881021/concept/main.go000066400000000000000000000036431467537433400200610ustar00rootroot00000000000000//go:build ignore // +build ignore package main import ( "flag" "fmt" "io" "log" "os" "github.com/vbatts/tar-split/archive/tar" ) func main() { flag.Parse() log.SetOutput(os.Stderr) for _, arg := range flag.Args() { func() { // Open the tar archive fh, err := os.Open(arg) if err != nil { log.Fatal(err, arg) } defer fh.Close() output, err := os.Create(fmt.Sprintf("%s.out", arg)) if err != nil { log.Fatal(err) } defer output.Close() log.Printf("writing %q to %q", fh.Name(), output.Name()) fi, err := fh.Stat() if err != nil { log.Fatal(err, fh.Name()) } size := fi.Size() var sum int64 tr := tar.NewReader(fh) tr.RawAccounting = true for { hdr, err := tr.Next() if err != nil { if err != io.EOF { log.Println(err) } // even when an EOF is reached, there is often 1024 null bytes on // the end of an archive. Collect them too. post := tr.RawBytes() output.Write(post) sum += int64(len(post)) fmt.Printf("EOF padding: %d\n", len(post)) break } pre := tr.RawBytes() output.Write(pre) sum += int64(len(pre)) var i int64 if i, err = io.Copy(output, tr); err != nil { log.Println(err) break } sum += i fmt.Println(hdr.Name, "pre:", len(pre), "read:", i) } // it is allowable, and not uncommon that there is further padding on the // end of an archive, apart from the expected 1024 null bytes remainder, err := io.ReadAll(fh) if err != nil && err != io.EOF { log.Fatal(err, fh.Name()) } output.Write(remainder) sum += int64(len(remainder)) fmt.Printf("Remainder: %d\n", len(remainder)) if size != sum { fmt.Printf("Size: %d; Sum: %d; Diff: %d\n", size, sum, size-sum) fmt.Printf("Compare like `cmp -bl %s %s | less`\n", fh.Name(), output.Name()) } else { fmt.Printf("Size: %d; Sum: %d\n", size, sum) } }() } } vbatts-tar-split-6881021/go.mod000066400000000000000000000011521467537433400162520ustar00rootroot00000000000000module github.com/vbatts/tar-split go 1.17 require ( github.com/fatih/color v1.15.0 github.com/magefile/mage v1.14.0 github.com/sirupsen/logrus v1.9.3 github.com/stretchr/testify v1.9.0 github.com/urfave/cli v1.22.15 ) require ( github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.17 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect golang.org/x/sys v0.25.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) vbatts-tar-split-6881021/go.sum000066400000000000000000000076101467537433400163040ustar00rootroot00000000000000github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/cpuguy83/go-md2man/v2 v2.0.5 h1:ZtcqGrnekaHpVLArFSe4HK5DoKx1T0rq2DwVB0alcyc= github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs= github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw= github.com/magefile/mage v1.14.0 h1:6QDX3g6z1YvJ4olPhT1wksUcSa/V0a1B+pJb73fBjyo= github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng= github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/urfave/cli v1.22.15 h1:nuqt+pdC/KqswQKhETJjo7pvn/k4xMUxgW6liI7XpnM= github.com/urfave/cli v1.22.15/go.mod h1:wSan1hmo5zeyLGBjRJbzRTNk8gwoYa2B9n4q9dmRIc0= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34= golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= vbatts-tar-split-6881021/mage.go000066400000000000000000000001761467537433400164110ustar00rootroot00000000000000// +build ignore package main import ( "os" "github.com/magefile/mage/mage" ) func main() { os.Exit(mage.Main()) } vbatts-tar-split-6881021/mage_color.go000066400000000000000000000007521467537433400176070ustar00rootroot00000000000000//go:build mage // +build mage package main import ( "io" "os" "github.com/fatih/color" ) var ( ourStdout = cw{c: color.New(color.FgGreen), o: os.Stdout} ourStderr = cw{c: color.New(color.FgRed), o: os.Stderr} ) // hack around color.Color not implementing Write() type cw struct { c *color.Color o io.Writer } func (cw cw) Write(p []byte) (int, error) { i := len(p) _, err := cw.c.Fprint(cw.o, string(p)) // discarding the number of bytes written for now... return i, err } vbatts-tar-split-6881021/magefile.go000066400000000000000000000063101467537433400172450ustar00rootroot00000000000000//go:build mage // +build mage package main import ( "errors" "fmt" "os" "os/exec" "time" "github.com/magefile/mage/mg" // mg contains helpful utility functions, like Deps ) var ( // Default target to run when none is specified // If not set, running mage will list available targets Default = Build app string = "tar-split" Stdout = ourStdout Stderr = ourStderr golangcilintVersion = "v1.51.2" cleanFiles = []string{} ) // Run all-the-things func All() error { mg.Deps(Vet) mg.Deps(Test) mg.Deps(Build) mg.Deps(Lint) return nil } // A build step that requires additional params, or platform specific steps for example func Build() error { mg.Deps(InstallDeps) fmt.Println("Building...") cmd := exec.Command("go", "build", "-v", "-o", app, "./cmd/tar-split") cmd.Stdout = Stdout cmd.Stderr = Stderr return cmd.Run() } // Vet the codes func Vet() error { fmt.Println("go vet...") cmd := exec.Command("go", "vet", "./...") cmd.Stdout = Stdout cmd.Stderr = Stderr return cmd.Run() } // Run the Linters func Lint() error { mg.Deps(InstallToolsLint) fmt.Println("Linting...") cmd := exec.Command("golangci-lint", "run") cmd.Stdout = Stdout cmd.Stderr = Stderr return cmd.Run() } // Run the tests available func Test() error { fmt.Println("Testing...") cmd := exec.Command("go", "test", "-cover", "-v", "-bench", "'.'", "-benchmem", "./...") cmd.Stdout = Stdout cmd.Stderr = Stderr return cmd.Run() } // A custom install step if you need your bin someplace other than go/bin func Install() error { mg.Deps(Build) fmt.Println("Installing...") return os.Rename(app, "/usr/local/bin/"+app) } func init() { cleanFiles = append(cleanFiles, ".install.deps") // sloppy } // Manage your deps, or running package managers. func InstallDeps() error { const fpath = ".install.deps" success := false defer func() { if success { fd, err := os.Create(fpath) if err != nil { fmt.Fprintln(os.Stderr, err) } fd.Close() } }() if IsFresh(fpath, time.Now()) { return nil } mg.Deps(Tidy) fmt.Println("Installing Deps...") cmd := exec.Command("go", "get", "./...") cmd.Stdout = Stdout cmd.Stderr = Stderr err := cmd.Run() if err != nil { return err } success = true return nil } // Tools used during build/dev/test func InstallTools() error { mg.Deps(InstallToolsLint) return nil } func InstallToolsLint() error { fmt.Println("Installing Deps...") cmd := exec.Command("go", "install", "github.com/golangci/golangci-lint/cmd/golangci-lint@"+golangcilintVersion) cmd.Stdout = Stdout cmd.Stderr = Stderr return cmd.Run() } // Tidy go modules func Tidy() error { fmt.Println("Tidy up...") cmd := exec.Command("go", "mod", "tidy") cmd.Stdout = Stdout cmd.Stderr = Stderr return cmd.Run() } // Clean up after yourself func Clean() { fmt.Println("Cleaning...") os.RemoveAll(app) for _, fpath := range cleanFiles { os.RemoveAll(fpath) } } // IsFresh checks if `fpath` exists (therefore `false`, it is not fresh) or if // `fpath` is _newer_ than `t` (true, as in it's freshly built) func IsFresh(fpath string, t time.Time) bool { fi, err := os.Stat(fpath) if err != nil && errors.Is(err, os.ErrNotExist) { return false } return fi.ModTime().Before(t) } vbatts-tar-split-6881021/tar/000077500000000000000000000000001467537433400157335ustar00rootroot00000000000000vbatts-tar-split-6881021/tar/asm/000077500000000000000000000000001467537433400165135ustar00rootroot00000000000000vbatts-tar-split-6881021/tar/asm/README.md000066400000000000000000000030121467537433400177660ustar00rootroot00000000000000asm === This library for assembly and disassembly of tar archives, facilitated by `github.com/vbatts/tar-split/tar/storage`. Concerns -------- For completely safe assembly/disassembly, there will need to be a Content Addressable Storage (CAS) directory, that maps to a checksum in the `storage.Entity` of `storage.FileType`. This is due to the fact that tar archives _can_ allow multiple records for the same path, but the last one effectively wins. Even if the prior records had a different payload. In this way, when assembling an archive from relative paths, if the archive has multiple entries for the same path, then all payloads read in from a relative path would be identical. Thoughts -------- Have a look-aside directory or storage. This way when a clobbering record is encountered from the tar stream, then the payload of the prior/existing file is stored to the CAS. This way the clobbering record's file payload can be extracted, but we'll have preserved the payload needed to reassemble a precise tar archive. clobbered/path/to/file.[0-N] *alternatively* We could just _not_ support tar streams that have clobbering file paths. Appending records to the archive is not incredibly common, and doesn't happen by default for most implementations. Not supporting them wouldn't be a security concern either, as if it did occur, we would reassemble an archive that doesn't validate signature/checksum, so it shouldn't be trusted anyway. Otherwise, this will allow us to defer support for appended files as a FUTURE FEATURE. vbatts-tar-split-6881021/tar/asm/assemble.go000066400000000000000000000061531467537433400206420ustar00rootroot00000000000000package asm import ( "bytes" "fmt" "hash" "hash/crc64" "io" "sync" "github.com/vbatts/tar-split/tar/storage" ) // NewOutputTarStream returns an io.ReadCloser that is an assembled tar archive // stream. // // It takes a storage.FileGetter, for mapping the file payloads that are to be read in, // and a storage.Unpacker, which has access to the rawbytes and file order // metadata. With the combination of these two items, a precise assembled Tar // archive is possible. func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadCloser { // ... Since these are interfaces, this is possible, so let's not have a nil pointer if fg == nil || up == nil { return nil } pr, pw := io.Pipe() go func() { err := WriteOutputTarStream(fg, up, pw) if err != nil { pw.CloseWithError(err) } else { pw.Close() } }() return pr } // WriteOutputTarStream writes assembled tar archive to a writer. func WriteOutputTarStream(fg storage.FileGetter, up storage.Unpacker, w io.Writer) error { // ... Since these are interfaces, this is possible, so let's not have a nil pointer if fg == nil || up == nil { return nil } var copyBuffer []byte var crcHash hash.Hash var crcSum []byte var multiWriter io.Writer for { entry, err := up.Next() if err != nil { if err == io.EOF { return nil } return err } switch entry.Type { case storage.SegmentType: if _, err := w.Write(entry.Payload); err != nil { return err } case storage.FileType: if entry.Size == 0 { continue } fh, err := fg.Get(entry.GetName()) if err != nil { return err } if crcHash == nil { crcHash = crc64.New(storage.CRCTable) crcSum = make([]byte, 8) multiWriter = io.MultiWriter(w, crcHash) copyBuffer = byteBufferPool.Get().([]byte) // TODO once we have some benchmark or memory profile then we can experiment with using *bytes.Buffer //nolint:staticcheck // SA6002 not going to do a pointer here defer byteBufferPool.Put(copyBuffer) } else { crcHash.Reset() } if _, err := copyWithBuffer(multiWriter, fh, copyBuffer); err != nil { fh.Close() return err } if !bytes.Equal(crcHash.Sum(crcSum[:0]), entry.Payload) { // I would rather this be a comparable ErrInvalidChecksum or such, // but since it's coming through the PipeReader, the context of // _which_ file would be lost... fh.Close() return fmt.Errorf("file integrity checksum failed for %q", entry.GetName()) } fh.Close() } } } var byteBufferPool = &sync.Pool{ New: func() interface{} { return make([]byte, 32*1024) }, } // copyWithBuffer is taken from stdlib io.Copy implementation // https://github.com/golang/go/blob/go1.5.1/src/io/io.go#L367 func copyWithBuffer(dst io.Writer, src io.Reader, buf []byte) (written int64, err error) { for { nr, er := src.Read(buf) if nr > 0 { nw, ew := dst.Write(buf[0:nr]) if nw > 0 { written += int64(nw) } if ew != nil { err = ew break } if nr != nw { err = io.ErrShortWrite break } } if er == io.EOF { break } if er != nil { err = er break } } return written, err } vbatts-tar-split-6881021/tar/asm/assemble_test.go000066400000000000000000000144051467537433400217000ustar00rootroot00000000000000package asm import ( "bytes" "compress/gzip" "crypto/sha1" "fmt" "hash/crc64" "io" "os" "testing" "github.com/vbatts/tar-split/tar/storage" ) var entries = []struct { Entry storage.Entry Body []byte }{ { Entry: storage.Entry{ Type: storage.FileType, Name: "./hurr.txt", Payload: []byte{2, 116, 164, 177, 171, 236, 107, 78}, Size: 20, }, Body: []byte("imma hurr til I derp"), }, { Entry: storage.Entry{ Type: storage.FileType, Name: "./ermahgerd.txt", Payload: []byte{126, 72, 89, 239, 230, 252, 160, 187}, Size: 26, }, Body: []byte("café con leche, por favor"), }, { Entry: storage.Entry{ Type: storage.FileType, NameRaw: []byte{0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe4}, // this is invalid UTF-8. Just checking the round trip. Payload: []byte{126, 72, 89, 239, 230, 252, 160, 187}, Size: 26, }, Body: []byte("café con leche, por favor"), }, } var entriesMangled = []struct { Entry storage.Entry Body []byte }{ { Entry: storage.Entry{ Type: storage.FileType, Name: "./hurr.txt", Payload: []byte{3, 116, 164, 177, 171, 236, 107, 78}, Size: 20, }, // switch Body: []byte("imma derp til I hurr"), }, { Entry: storage.Entry{ Type: storage.FileType, Name: "./ermahgerd.txt", Payload: []byte{127, 72, 89, 239, 230, 252, 160, 187}, Size: 26, }, // san not con Body: []byte("café sans leche, por favor"), }, { Entry: storage.Entry{ Type: storage.FileType, NameRaw: []byte{0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe4}, Payload: []byte{127, 72, 89, 239, 230, 252, 160, 187}, Size: 26, }, Body: []byte("café con leche, por favor"), }, } func TestTarStreamMangledGetterPutter(t *testing.T) { fgp := storage.NewBufferFileGetPutter() // first lets prep a GetPutter and Packer for i := range entries { if entries[i].Entry.Type == storage.FileType { j, csum, err := fgp.Put(entries[i].Entry.GetName(), bytes.NewBuffer(entries[i].Body)) if err != nil { t.Error(err) } if j != entries[i].Entry.Size { t.Errorf("size %q: expected %d; got %d", entries[i].Entry.GetName(), entries[i].Entry.Size, j) } if !bytes.Equal(csum, entries[i].Entry.Payload) { t.Errorf("checksum %q: expected %v; got %v", entries[i].Entry.GetName(), entries[i].Entry.Payload, csum) } } } for _, e := range entriesMangled { if e.Entry.Type == storage.FileType { rdr, err := fgp.Get(e.Entry.GetName()) if err != nil { t.Error(err) } c := crc64.New(storage.CRCTable) i, err := io.Copy(c, rdr) if err != nil { t.Fatal(err) } rdr.Close() csum := c.Sum(nil) if bytes.Equal(csum, e.Entry.Payload) { t.Errorf("wrote %d bytes. checksum for %q should not have matched! %v", i, e.Entry.GetName(), csum) } } } } var testCases = []struct { path string expectedSHA1Sum string expectedSize int64 }{ {"./testdata/t.tar.gz", "1eb237ff69bca6e22789ecb05b45d35ca307adbd", 10240}, {"./testdata/longlink.tar.gz", "d9f6babe107b7247953dff6b5b5ae31a3a880add", 20480}, {"./testdata/fatlonglink.tar.gz", "8537f03f89aeef537382f8b0bb065d93e03b0be8", 26234880}, {"./testdata/iso-8859.tar.gz", "ddafa51cb03c74ec117ab366ee2240d13bba1ec3", 10240}, {"./testdata/extranils.tar.gz", "e187b4b3e739deaccc257342f4940f34403dc588", 10648}, {"./testdata/notenoughnils.tar.gz", "72f93f41efd95290baa5c174c234f5d4c22ce601", 512}, {"./testdata/1c51fc286aa95d9413226599576bafa38490b1e292375c90de095855b64caea6", "946caa03167a8cc707db6ff9785608b652e631dc", 1024}, } func TestTarStream(t *testing.T) { for _, tc := range testCases { fh, err := os.Open(tc.path) if err != nil { t.Fatal(err) } defer fh.Close() gzRdr, err := gzip.NewReader(fh) if err != nil { t.Fatal(err) } defer gzRdr.Close() // Setup where we'll store the metadata w := bytes.NewBuffer([]byte{}) sp := storage.NewJSONPacker(w) fgp := storage.NewBufferFileGetPutter() // wrap the disassembly stream tarStream, err := NewInputTarStream(gzRdr, sp, fgp) if err != nil { t.Fatal(err) } // get a sum of the stream after it has passed through to ensure it's the same. h0 := sha1.New() i, err := io.Copy(h0, tarStream) if err != nil { t.Fatal(err) } if i != tc.expectedSize { t.Errorf("size of tar: expected %d; got %d", tc.expectedSize, i) } if fmt.Sprintf("%x", h0.Sum(nil)) != tc.expectedSHA1Sum { t.Fatalf("checksum of tar: expected %s; got %x", tc.expectedSHA1Sum, h0.Sum(nil)) } //t.Logf("%s", w.String()) // if we fail, then show the packed info // If we've made it this far, then we'll turn it around and create a tar // stream from the packed metadata and buffered file contents. r := bytes.NewBuffer(w.Bytes()) sup := storage.NewJSONUnpacker(r) // and reuse the fgp that we Put the payloads to. rc := NewOutputTarStream(fgp, sup) h1 := sha1.New() i, err = io.Copy(h1, rc) if err != nil { t.Fatal(err) } if i != tc.expectedSize { t.Errorf("size of output tar: expected %d; got %d", tc.expectedSize, i) } if fmt.Sprintf("%x", h1.Sum(nil)) != tc.expectedSHA1Sum { t.Fatalf("checksum of output tar: expected %s; got %x", tc.expectedSHA1Sum, h1.Sum(nil)) } } } func BenchmarkAsm(b *testing.B) { for i := 0; i < b.N; i++ { for _, tc := range testCases { func() { fh, err := os.Open(tc.path) if err != nil { b.Fatal(err) } defer fh.Close() gzRdr, err := gzip.NewReader(fh) if err != nil { b.Fatal(err) } defer gzRdr.Close() // Setup where we'll store the metadata w := bytes.NewBuffer([]byte{}) sp := storage.NewJSONPacker(w) fgp := storage.NewBufferFileGetPutter() // wrap the disassembly stream tarStream, err := NewInputTarStream(gzRdr, sp, fgp) if err != nil { b.Fatal(err) } // read it all to the bit bucket i1, err := io.Copy(io.Discard, tarStream) if err != nil { b.Fatal(err) } r := bytes.NewBuffer(w.Bytes()) sup := storage.NewJSONUnpacker(r) // and reuse the fgp that we Put the payloads to. rc := NewOutputTarStream(fgp, sup) i2, err := io.Copy(io.Discard, rc) if err != nil { b.Fatal(err) } if i1 != i2 { b.Errorf("%s: input(%d) and ouput(%d) byte count didn't match", tc.path, i1, i2) } }() } } } vbatts-tar-split-6881021/tar/asm/disassemble.go000066400000000000000000000101701467537433400213340ustar00rootroot00000000000000package asm import ( "io" "github.com/vbatts/tar-split/archive/tar" "github.com/vbatts/tar-split/tar/storage" ) // NewInputTarStream wraps the Reader stream of a tar archive and provides a // Reader stream of the same. // // In the middle it will pack the segments and file metadata to storage.Packer // `p`. // // The the storage.FilePutter is where payload of files in the stream are // stashed. If this stashing is not needed, you can provide a nil // storage.FilePutter. Since the checksumming is still needed, then a default // of NewDiscardFilePutter will be used internally func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io.Reader, error) { // What to do here... folks will want their own access to the Reader that is // their tar archive stream, but we'll need that same stream to use our // forked 'archive/tar'. // Perhaps do an io.TeeReader that hands back an io.Reader for them to read // from, and we'll MITM the stream to store metadata. // We'll need a storage.FilePutter too ... // Another concern, whether to do any storage.FilePutter operations, such that we // don't extract any amount of the archive. But then again, we're not making // files/directories, hardlinks, etc. Just writing the io to the storage.FilePutter. // Perhaps we have a DiscardFilePutter that is a bit bucket. // we'll return the pipe reader, since TeeReader does not buffer and will // only read what the outputRdr Read's. Since Tar archives have padding on // the end, we want to be the one reading the padding, even if the user's // `archive/tar` doesn't care. pR, pW := io.Pipe() outputRdr := io.TeeReader(r, pW) // we need a putter that will generate the crc64 sums of file payloads if fp == nil { fp = storage.NewDiscardFilePutter() } go func() { tr := tar.NewReader(outputRdr) tr.RawAccounting = true for { hdr, err := tr.Next() if err != nil { if err != io.EOF { pW.CloseWithError(err) return } // even when an EOF is reached, there is often 1024 null bytes on // the end of an archive. Collect them too. if b := tr.RawBytes(); len(b) > 0 { _, err := p.AddEntry(storage.Entry{ Type: storage.SegmentType, Payload: b, }) if err != nil { pW.CloseWithError(err) return } } break // not return. We need the end of the reader. } if hdr == nil { break // not return. We need the end of the reader. } if b := tr.RawBytes(); len(b) > 0 { _, err := p.AddEntry(storage.Entry{ Type: storage.SegmentType, Payload: b, }) if err != nil { pW.CloseWithError(err) return } } var csum []byte if hdr.Size > 0 { var err error _, csum, err = fp.Put(hdr.Name, tr) if err != nil { pW.CloseWithError(err) return } } entry := storage.Entry{ Type: storage.FileType, Size: hdr.Size, Payload: csum, } // For proper marshalling of non-utf8 characters entry.SetName(hdr.Name) // File entries added, regardless of size _, err = p.AddEntry(entry) if err != nil { pW.CloseWithError(err) return } if b := tr.RawBytes(); len(b) > 0 { _, err = p.AddEntry(storage.Entry{ Type: storage.SegmentType, Payload: b, }) if err != nil { pW.CloseWithError(err) return } } } // It is allowable, and not uncommon that there is further padding on // the end of an archive, apart from the expected 1024 null bytes. We // do this in chunks rather than in one go to avoid cases where a // maliciously crafted tar file tries to trick us into reading many GBs // into memory. const paddingChunkSize = 1024 * 1024 var paddingChunk [paddingChunkSize]byte for { var isEOF bool n, err := outputRdr.Read(paddingChunk[:]) if err != nil { if err != io.EOF { pW.CloseWithError(err) return } isEOF = true } if n != 0 { _, err = p.AddEntry(storage.Entry{ Type: storage.SegmentType, Payload: paddingChunk[:n], }) if err != nil { pW.CloseWithError(err) return } } if isEOF { break } } pW.Close() }() return pR, nil } vbatts-tar-split-6881021/tar/asm/disassemble_test.go000066400000000000000000000027341467537433400224020ustar00rootroot00000000000000package asm import ( "archive/tar" "fmt" "io" "os" "testing" "github.com/vbatts/tar-split/tar/storage" ) // This test failing causes the binary to crash due to memory overcommitment. func TestLargeJunkPadding(t *testing.T) { pR, pW := io.Pipe() // Write a normal tar file into the pipe and then load it full of junk // bytes as padding. We have to do this in a goroutine because we can't // store 20GB of junk in-memory. go func() { // Empty archive. tw := tar.NewWriter(pW) if err := tw.Close(); err != nil { pW.CloseWithError(err) return } // Write junk. const ( junkChunkSize = 64 * 1024 * 1024 junkChunkNum = 20 * 16 ) devZero, err := os.Open("/dev/zero") if err != nil { pW.CloseWithError(err) return } defer devZero.Close() for i := 0; i < junkChunkNum; i++ { if i%32 == 0 { fmt.Fprintf(os.Stderr, "[TestLargeJunkPadding] junk chunk #%d/#%d\n", i, junkChunkNum) } if _, err := io.CopyN(pW, devZero, junkChunkSize); err != nil { pW.CloseWithError(err) return } } fmt.Fprintln(os.Stderr, "[TestLargeJunkPadding] junk chunk finished") pW.Close() }() // Disassemble our junk file. nilPacker := storage.NewJSONPacker(io.Discard) rdr, err := NewInputTarStream(pR, nilPacker, nil) if err != nil { t.Fatal(err) } // Copy the entire rdr. _, err = io.Copy(io.Discard, rdr) if err != nil { t.Fatal(err) } // At this point, if we haven't crashed then we are not vulnerable to // CVE-2017-14992. } vbatts-tar-split-6881021/tar/asm/doc.go000066400000000000000000000004301467537433400176040ustar00rootroot00000000000000/* Package asm provides the API for streaming assembly and disassembly of tar archives. Using the `github.com/vbatts/tar-split/tar/storage` for Packing/Unpacking the metadata for a stream, as well as an implementation of Getting/Putting the file entries' payload. */ package asm vbatts-tar-split-6881021/tar/asm/iterate.go000066400000000000000000000033521467537433400205020ustar00rootroot00000000000000package asm import ( "bytes" "fmt" "io" "github.com/vbatts/tar-split/archive/tar" "github.com/vbatts/tar-split/tar/storage" ) // IterateHeaders calls handler for each tar header provided by Unpacker func IterateHeaders(unpacker storage.Unpacker, handler func(hdr *tar.Header) error) error { // We assume about NewInputTarStream: // - There is a separate SegmentType entry for every tar header, but only one SegmentType entry for the full header incl. any extensions // - (There is a FileType entry for every tar header, we ignore it) // - Trailing padding of a file, if any, is included in the next SegmentType entry // - At the end, there may be SegmentType entries just for the terminating zero blocks. var pendingPadding int64 = 0 for { tsEntry, err := unpacker.Next() if err != nil { if err == io.EOF { return nil } return fmt.Errorf("reading tar-split entries: %w", err) } switch tsEntry.Type { case storage.SegmentType: payload := tsEntry.Payload if int64(len(payload)) < pendingPadding { return fmt.Errorf("expected %d bytes of padding after previous file, but next SegmentType only has %d bytes", pendingPadding, len(payload)) } payload = payload[pendingPadding:] pendingPadding = 0 tr := tar.NewReader(bytes.NewReader(payload)) hdr, err := tr.Next() if err != nil { if err == io.EOF { // Probably the last entry, but let’s let the unpacker drive that. break } return fmt.Errorf("decoding a tar header from a tar-split entry: %w", err) } if err := handler(hdr); err != nil { return err } pendingPadding = tr.ExpectedPadding() case storage.FileType: // Nothing default: return fmt.Errorf("unexpected tar-split entry type %q", tsEntry.Type) } } } vbatts-tar-split-6881021/tar/asm/iterate_test.go000066400000000000000000000070771467537433400215510ustar00rootroot00000000000000package asm import ( "bytes" "fmt" "io" "testing" "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/vbatts/tar-split/archive/tar" "github.com/vbatts/tar-split/tar/storage" ) func createTestTarheader(index int, typeFlag byte, size int64) tar.Header { n := (index + 1) * 100 // Use predictable, but distinct, values for all headers res := tar.Header{ Typeflag: typeFlag, Name: fmt.Sprintf("name%d", n), Size: size, Mode: int64(n + 1), Uid: n + 2, Gid: n + 3, Uname: fmt.Sprintf("user%d", n), Gname: fmt.Sprintf("group%d", n), ModTime: time.Unix(int64(n+4), 0), AccessTime: time.Unix(int64(n+5), 0), ChangeTime: time.Unix(int64(n+6), 0), PAXRecords: map[string]string{fmt.Sprintf("key%d", n): fmt.Sprintf("value%d", n)}, Format: tar.FormatPAX, // We must set a format, in the default one AccessTime and ChangeTime are discarded. } switch res.Typeflag { case tar.TypeLink, tar.TypeSymlink: res.Linkname = fmt.Sprintf("link%d", n) case tar.TypeChar, tar.TypeBlock: res.Devmajor = int64(n + 7) res.Devminor = int64(n + 8) } return res } func TestIterateHeaders(t *testing.T) { entries := []struct { typeFlag byte size int64 }{ {tar.TypeReg, 0}, {tar.TypeReg, 1}, {tar.TypeReg, 511}, {tar.TypeReg, 512}, {tar.TypeReg, 513}, {tar.TypeLink, 0}, {tar.TypeSymlink, 0}, {tar.TypeChar, 0}, {tar.TypeBlock, 0}, {tar.TypeDir, 0}, {tar.TypeFifo, 0}, } var tarball bytes.Buffer var expected []tar.Header w := tar.NewWriter(&tarball) for i, e := range entries { hdr := createTestTarheader(i, e.typeFlag, e.size) err := w.WriteHeader(&hdr) require.NoError(t, err) data := make([]byte, e.size) _, err = w.Write(data) require.NoError(t, err) expected = append(expected, hdr) } err := w.Close() require.NoError(t, err) var tarSplit bytes.Buffer tsReader, err := NewInputTarStream(&tarball, storage.NewJSONPacker(&tarSplit), storage.NewDiscardFilePutter()) require.NoError(t, err) _, err = io.Copy(io.Discard, tsReader) require.NoError(t, err) unpacker := storage.NewJSONUnpacker(&tarSplit) var actual []tar.Header err = IterateHeaders(unpacker, func(hdr *tar.Header) error { actual = append(actual, *hdr) return nil }) require.NoError(t, err) assert.Equal(t, len(expected), len(actual)) for i := range expected { expected := &expected[i] actual := &actual[i] assert.Equal(t, expected.Typeflag, actual.Typeflag) assert.Equal(t, expected.Name, actual.Name) assert.Equal(t, expected.Linkname, actual.Linkname) assert.Equal(t, expected.Size, actual.Size) assert.Equal(t, expected.Mode, actual.Mode) assert.Equal(t, expected.Uid, actual.Uid) assert.Equal(t, expected.Gid, actual.Gid) assert.Equal(t, expected.Uname, actual.Uname) assert.Equal(t, expected.Gname, actual.Gname) assert.True(t, actual.ModTime.Equal(expected.ModTime)) assert.True(t, actual.AccessTime.Equal(expected.AccessTime)) assert.True(t, actual.ChangeTime.Equal(expected.ChangeTime)) assert.Equal(t, expected.Devmajor, actual.Devmajor) assert.Equal(t, expected.Devminor, actual.Devminor) assert.Equal(t, expected.Xattrs, actual.Xattrs) //nolint:staticcheck // We do want a comprehensive coverage in this test. // We can’t compare PAXRecords for complete equality, because tar.Writer adds atime and ctime entries. So ensure all expected records are present. for k, v := range expected.PAXRecords { v2, ok := actual.PAXRecords[k] assert.True(t, ok, k) assert.Equal(t, v, v2) } assert.Equal(t, expected.Format, actual.Format) } } vbatts-tar-split-6881021/tar/asm/testdata/000077500000000000000000000000001467537433400203245ustar00rootroot000000000000001c51fc286aa95d9413226599576bafa38490b1e292375c90de095855b64caea6000066400000000000000000000001651467537433400305730ustar00rootroot00000000000000vbatts-tar-split-6881021/tar/asm/testdata+)*Me10SSS0 贁 7334bP0`(-.I,RP`Ǎgbc*|,z,`:@4+,2]!Aa 8tvbatts-tar-split-6881021/tar/asm/testdata/extranils.tar.gz000066400000000000000000000001771467537433400234710ustar00rootroot00000000000000 gWextranils.tar= @ѷY¼1f!`DiuNsO9~ލ|[d뽵qrCR.6-0m7/k|tNҘ)vbatts-tar-split-6881021/tar/asm/testdata/fatlonglink.tar.gz000066400000000000000000000634421467537433400237740ustar00rootroot00000000000000Utest.tarkNP.+si6Q%c"up2`H6!N~O ]WcJ[UL͹ئ*ĘTե:պ_WwLC~5_LOʡ7͗_PZ';w]v1"?<wm*R )sM?`|;S?j9|MJ C0}oCknw;_1S\1N@x1,1/׮w|1<=I߾۶y:p&b=/!1Vպ~\vݾg_-~5_ݦ4 qQz6i%" k:> fAp>$(q>~)5Y^ysk_~.a"J%MY2)c)JM]X~]my~n *@2dPvbatts-tar-split-6881021/tar/asm/testdata/iso-8859.tar.gz000066400000000000000000000002731467537433400226620ustar00rootroot00000000000000Viso-8859.tarA 0@Ѭ=E.&=HZ 1EFԍ mf 4,KFyvAH&E(Kh}>Rд6u솮!9Gm|ϧA|0V8&E7;]X$r(vbatts-tar-split-6881021/tar/asm/testdata/longlink.tar.gz000066400000000000000000000006661467537433400233000ustar00rootroot00000000000000=Ulonglink.tarkj@@,$3yDZIKߤUX7fn$ԞāUӮWUM{u!0ֆY_[5Ţxl=wߴu!w>$k?up?@_] s.mc_X9@?h7Wp}C LUOySK@_!}d 3yP[*i!OֳW1!}ژa 0 will get // read into a resulting output stream (due to hardlinks). FileType Type = 1 + iota // SegmentType represents a raw bytes segment from the archive stream. These raw // byte segments consist of the raw headers and various padding. // // Its payload is to be marshalled base64 encoded. SegmentType ) // Entry is the structure for packing and unpacking the information read from // the Tar archive. // // FileType Payload checksum is using `hash/crc64` for basic file integrity, // _not_ for cryptography. // From http://www.backplane.com/matt/crc64.html, CRC32 has almost 40,000 // collisions in a sample of 18.2 million, CRC64 had none. type Entry struct { Type Type `json:"type"` Name string `json:"name,omitempty"` NameRaw []byte `json:"name_raw,omitempty"` Size int64 `json:"size,omitempty"` Payload []byte `json:"payload"` // SegmentType stores payload here; FileType stores crc64 checksum here; Position int `json:"position"` } // SetName will check name for valid UTF-8 string, and set the appropriate // field. See https://github.com/vbatts/tar-split/issues/17 func (e *Entry) SetName(name string) { if utf8.ValidString(name) { e.Name = name } else { e.NameRaw = []byte(name) } } // SetNameBytes will check name for valid UTF-8 string, and set the appropriate // field func (e *Entry) SetNameBytes(name []byte) { if utf8.Valid(name) { e.Name = string(name) } else { e.NameRaw = name } } // GetName returns the string for the entry's name, regardless of the field stored in func (e *Entry) GetName() string { if len(e.NameRaw) > 0 { return string(e.NameRaw) } return e.Name } // GetNameBytes returns the bytes for the entry's name, regardless of the field stored in func (e *Entry) GetNameBytes() []byte { if len(e.NameRaw) > 0 { return e.NameRaw } return []byte(e.Name) } vbatts-tar-split-6881021/tar/storage/entry_test.go000066400000000000000000000034041467537433400221270ustar00rootroot00000000000000package storage import ( "encoding/json" "sort" "testing" ) func TestEntries(t *testing.T) { e := Entries{ Entry{ Type: SegmentType, Payload: []byte("y'all"), Position: 1, }, Entry{ Type: SegmentType, Payload: []byte("doin"), Position: 3, }, Entry{ Type: FileType, Name: "./hurr.txt", Payload: []byte("deadbeef"), Position: 2, }, Entry{ Type: SegmentType, Payload: []byte("how"), Position: 0, }, } sort.Sort(e) if e[0].Position != 0 { t.Errorf("expected Position 0, but got %d", e[0].Position) } } func TestFile(t *testing.T) { f := Entry{ Type: FileType, Size: 100, Position: 2, } f.SetName("./hello.txt") buf, err := json.Marshal(f) if err != nil { t.Fatal(err) } f1 := Entry{} if err = json.Unmarshal(buf, &f1); err != nil { t.Fatal(err) } if f.GetName() != f1.GetName() { t.Errorf("expected Name %q, got %q", f.GetName(), f1.GetName()) } if f.Size != f1.Size { t.Errorf("expected Size %q, got %q", f.Size, f1.Size) } if f.Position != f1.Position { t.Errorf("expected Position %q, got %q", f.Position, f1.Position) } } func TestFileRaw(t *testing.T) { f := Entry{ Type: FileType, Size: 100, Position: 2, } f.SetNameBytes([]byte{0x2E, 0x2F, 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0xE4, 0x2E, 0x74, 0x78, 0x74}) buf, err := json.Marshal(f) if err != nil { t.Fatal(err) } f1 := Entry{} if err = json.Unmarshal(buf, &f1); err != nil { t.Fatal(err) } if f.GetName() != f1.GetName() { t.Errorf("expected Name %q, got %q", f.GetName(), f1.GetName()) } if f.Size != f1.Size { t.Errorf("expected Size %q, got %q", f.Size, f1.Size) } if f.Position != f1.Position { t.Errorf("expected Position %q, got %q", f.Position, f1.Position) } } vbatts-tar-split-6881021/tar/storage/getter.go000066400000000000000000000052771467537433400212330ustar00rootroot00000000000000package storage import ( "bytes" "errors" "hash/crc64" "io" "os" "path/filepath" ) // FileGetter is the interface for getting a stream of a file payload, // addressed by name/filename. Presumably, the names will be scoped to relative // file paths. type FileGetter interface { // Get returns a stream for the provided file path Get(filename string) (output io.ReadCloser, err error) } // FilePutter is the interface for storing a stream of a file payload, // addressed by name/filename. type FilePutter interface { // Put returns the size of the stream received, and the crc64 checksum for // the provided stream Put(filename string, input io.Reader) (size int64, checksum []byte, err error) } // FileGetPutter is the interface that groups both Getting and Putting file // payloads. type FileGetPutter interface { FileGetter FilePutter } // NewPathFileGetter returns a FileGetter that is for files relative to path // relpath. func NewPathFileGetter(relpath string) FileGetter { return &pathFileGetter{root: relpath} } type pathFileGetter struct { root string } func (pfg pathFileGetter) Get(filename string) (io.ReadCloser, error) { return os.Open(filepath.Join(pfg.root, filename)) } type bufferFileGetPutter struct { files map[string][]byte } func (bfgp bufferFileGetPutter) Get(name string) (io.ReadCloser, error) { if _, ok := bfgp.files[name]; !ok { return nil, errors.New("no such file") } b := bytes.NewBuffer(bfgp.files[name]) return &readCloserWrapper{b}, nil } func (bfgp *bufferFileGetPutter) Put(name string, r io.Reader) (int64, []byte, error) { crc := crc64.New(CRCTable) buf := bytes.NewBuffer(nil) cw := io.MultiWriter(crc, buf) i, err := io.Copy(cw, r) if err != nil { return 0, nil, err } bfgp.files[name] = buf.Bytes() return i, crc.Sum(nil), nil } type readCloserWrapper struct { io.Reader } func (w *readCloserWrapper) Close() error { return nil } // NewBufferFileGetPutter is a simple in-memory FileGetPutter // // Implication is this is memory intensive... // Probably best for testing or light weight cases. func NewBufferFileGetPutter() FileGetPutter { return &bufferFileGetPutter{ files: map[string][]byte{}, } } // NewDiscardFilePutter is a bit bucket FilePutter func NewDiscardFilePutter() FilePutter { return &bitBucketFilePutter{} } type bitBucketFilePutter struct { buffer [32 * 1024]byte // 32 kB is the buffer size currently used by io.Copy, as of August 2021. } func (bbfp *bitBucketFilePutter) Put(name string, r io.Reader) (int64, []byte, error) { c := crc64.New(CRCTable) i, err := io.CopyBuffer(c, r, bbfp.buffer[:]) return i, c.Sum(nil), err } // CRCTable is the default table used for crc64 sum calculations var CRCTable = crc64.MakeTable(crc64.ISO) vbatts-tar-split-6881021/tar/storage/getter_test.go000066400000000000000000000036621467537433400222660ustar00rootroot00000000000000package storage import ( "bytes" "fmt" "io" "strings" "testing" ) func TestGetter(t *testing.T) { fgp := NewBufferFileGetPutter() files := map[string]map[string][]byte{ "file1.txt": {"foo": []byte{60, 60, 48, 48, 0, 0, 0, 0}}, "file2.txt": {"bar": []byte{45, 196, 22, 240, 0, 0, 0, 0}}, } for n, b := range files { for body, sum := range b { _, csum, err := fgp.Put(n, bytes.NewBufferString(body)) if err != nil { t.Error(err) } if !bytes.Equal(csum, sum) { t.Errorf("checksum: expected 0x%x; got 0x%x", sum, csum) } } } for n, b := range files { for body := range b { r, err := fgp.Get(n) if err != nil { t.Error(err) } buf, err := io.ReadAll(r) if err != nil { t.Error(err) } if body != string(buf) { t.Errorf("expected %q, got %q", body, string(buf)) } } } } func TestPutter(t *testing.T) { fp := NewDiscardFilePutter() // map[filename]map[body]crc64sum files := map[string]map[string][]byte{ "file1.txt": {"foo": []byte{60, 60, 48, 48, 0, 0, 0, 0}}, "file2.txt": {"bar": []byte{45, 196, 22, 240, 0, 0, 0, 0}}, "file3.txt": {"baz": []byte{32, 68, 22, 240, 0, 0, 0, 0}}, "file4.txt": {"bif": []byte{48, 9, 150, 240, 0, 0, 0, 0}}, } for n, b := range files { for body, sum := range b { _, csum, err := fp.Put(n, bytes.NewBufferString(body)) if err != nil { t.Error(err) } if !bytes.Equal(csum, sum) { t.Errorf("checksum on %q: expected %v; got %v", n, sum, csum) } } } } func BenchmarkPutter(b *testing.B) { files := []string{ strings.Repeat("foo", 1000), strings.Repeat("bar", 1000), strings.Repeat("baz", 1000), strings.Repeat("fooz", 1000), strings.Repeat("vbatts", 1000), strings.Repeat("systemd", 1000), } for i := 0; i < b.N; i++ { fgp := NewBufferFileGetPutter() for n, body := range files { if _, _, err := fgp.Put(fmt.Sprintf("%d", n), bytes.NewBufferString(body)); err != nil { b.Fatal(err) } } } } vbatts-tar-split-6881021/tar/storage/packer.go000066400000000000000000000044171467537433400212010ustar00rootroot00000000000000package storage import ( "encoding/json" "errors" "io" "path/filepath" "unicode/utf8" ) // ErrDuplicatePath occurs when a tar archive has more than one entry for the // same file path var ErrDuplicatePath = errors.New("duplicates of file paths not supported") // Packer describes the methods to pack Entries to a storage destination type Packer interface { // AddEntry packs the Entry and returns its position AddEntry(e Entry) (int, error) } // Unpacker describes the methods to read Entries from a source type Unpacker interface { // Next returns the next Entry being unpacked, or error, until io.EOF Next() (*Entry, error) } type jsonUnpacker struct { seen seenNames dec *json.Decoder } func (jup *jsonUnpacker) Next() (*Entry, error) { var e Entry err := jup.dec.Decode(&e) if err != nil { return nil, err } // check for dup name if e.Type == FileType { cName := filepath.Clean(e.GetName()) if _, ok := jup.seen[cName]; ok { return nil, ErrDuplicatePath } jup.seen[cName] = struct{}{} } return &e, err } // NewJSONUnpacker provides an Unpacker that reads Entries (SegmentType and // FileType) as a json document. // // Each Entry read are expected to be delimited by new line. func NewJSONUnpacker(r io.Reader) Unpacker { return &jsonUnpacker{ dec: json.NewDecoder(r), seen: seenNames{}, } } type jsonPacker struct { w io.Writer e *json.Encoder pos int seen seenNames } type seenNames map[string]struct{} func (jp *jsonPacker) AddEntry(e Entry) (int, error) { // if Name is not valid utf8, switch it to raw first. if e.Name != "" { if !utf8.ValidString(e.Name) { e.NameRaw = []byte(e.Name) e.Name = "" } } // check early for dup name if e.Type == FileType { cName := filepath.Clean(e.GetName()) if _, ok := jp.seen[cName]; ok { return -1, ErrDuplicatePath } jp.seen[cName] = struct{}{} } e.Position = jp.pos err := jp.e.Encode(e) if err != nil { return -1, err } // made it this far, increment now jp.pos++ return e.Position, nil } // NewJSONPacker provides a Packer that writes each Entry (SegmentType and // FileType) as a json document. // // The Entries are delimited by new line. func NewJSONPacker(w io.Writer) Packer { return &jsonPacker{ w: w, e: json.NewEncoder(w), seen: seenNames{}, } } vbatts-tar-split-6881021/tar/storage/packer_test.go000066400000000000000000000075771467537433400222520ustar00rootroot00000000000000package storage import ( "bytes" "compress/gzip" "io" "os" "testing" ) func TestDuplicateFail(t *testing.T) { e := []Entry{ { Type: FileType, Name: "./hurr.txt", Payload: []byte("abcde"), }, { Type: FileType, Name: "./hurr.txt", Payload: []byte("deadbeef"), }, { Type: FileType, Name: "hurr.txt", // slightly different path, same file though Payload: []byte("deadbeef"), }, } buf := []byte{} b := bytes.NewBuffer(buf) jp := NewJSONPacker(b) if _, err := jp.AddEntry(e[0]); err != nil { t.Error(err) } if _, err := jp.AddEntry(e[1]); err != ErrDuplicatePath { t.Errorf("expected failure on duplicate path") } if _, err := jp.AddEntry(e[2]); err != ErrDuplicatePath { t.Errorf("expected failure on duplicate path") } } func TestJSONPackerUnpacker(t *testing.T) { e := []Entry{ { Type: SegmentType, Payload: []byte("how"), }, { Type: SegmentType, Payload: []byte("y'all"), }, { Type: FileType, Name: "./hurr.txt", Payload: []byte("deadbeef"), }, { Type: SegmentType, Payload: []byte("doin"), }, } buf := []byte{} b := bytes.NewBuffer(buf) func() { jp := NewJSONPacker(b) for i := range e { if _, err := jp.AddEntry(e[i]); err != nil { t.Error(err) } } }() // >> packer_test.go:43: uncompressed: 266 //t.Errorf("uncompressed: %d", len(b.Bytes())) b = bytes.NewBuffer(b.Bytes()) entries := Entries{} func() { jup := NewJSONUnpacker(b) for { entry, err := jup.Next() if err != nil { if err == io.EOF { break } t.Error(err) } entries = append(entries, *entry) t.Logf("got %#v", entry) } }() if len(entries) != len(e) { t.Errorf("expected %d entries, got %d", len(e), len(entries)) } } // you can use a compress Reader/Writer and make nice savings. // // For these two tests that are using the same set, it the difference of 266 // bytes uncompressed vs 138 bytes compressed. func TestGzip(t *testing.T) { e := []Entry{ { Type: SegmentType, Payload: []byte("how"), }, { Type: SegmentType, Payload: []byte("y'all"), }, { Type: FileType, Name: "./hurr.txt", Payload: []byte("deadbeef"), }, { Type: SegmentType, Payload: []byte("doin"), }, } buf := []byte{} b := bytes.NewBuffer(buf) gzW := gzip.NewWriter(b) jp := NewJSONPacker(gzW) for i := range e { if _, err := jp.AddEntry(e[i]); err != nil { t.Error(err) } } gzW.Close() // >> packer_test.go:99: compressed: 138 //t.Errorf("compressed: %d", len(b.Bytes())) b = bytes.NewBuffer(b.Bytes()) gzR, err := gzip.NewReader(b) if err != nil { t.Fatal(err) } entries := Entries{} func() { jup := NewJSONUnpacker(gzR) for { entry, err := jup.Next() if err != nil { if err == io.EOF { break } t.Error(err) } entries = append(entries, *entry) t.Logf("got %#v", entry) } }() if len(entries) != len(e) { t.Errorf("expected %d entries, got %d", len(e), len(entries)) } } func BenchmarkGetPut(b *testing.B) { e := []Entry{ { Type: SegmentType, Payload: []byte("how"), }, { Type: SegmentType, Payload: []byte("y'all"), }, { Type: FileType, Name: "./hurr.txt", Payload: []byte("deadbeef"), }, { Type: SegmentType, Payload: []byte("doin"), }, } b.RunParallel(func(pb *testing.PB) { for pb.Next() { func() { fh, err := os.CreateTemp("", "tar-split.") if err != nil { b.Fatal(err) } defer os.Remove(fh.Name()) defer fh.Close() jp := NewJSONPacker(fh) for i := range e { if _, err := jp.AddEntry(e[i]); err != nil { b.Fatal(err) } } if err := fh.Sync(); err != nil { b.Fatal(err) } up := NewJSONUnpacker(fh) for { _, err := up.Next() if err != nil { if err == io.EOF { break } b.Fatal(err) } } }() } }) }