docker-runc-tags-docker-1.13.1/000077500000000000000000000000001304443252500162145ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/.gitignore000066400000000000000000000001271304443252500202040ustar00rootroot00000000000000vendor/pkg /runc Godeps/_workspace/src/github.com/opencontainers/runc man/man8 release docker-runc-tags-docker-1.13.1/.pullapprove.yml000066400000000000000000000002741304443252500213710ustar00rootroot00000000000000approve_by_comment: true approve_regex: ^LGTM reject_regex: ^Rejected reset_on_push: true author_approval: ignored reviewers: teams: - runc-maintainers name: default required: 2 docker-runc-tags-docker-1.13.1/CONTRIBUTING.md000066400000000000000000000114441304443252500204510ustar00rootroot00000000000000## Contribution Guidelines ### Pull requests are always welcome We are always thrilled to receive pull requests, and do our best to process them as fast as possible. Not sure if that typo is worth a pull request? Do it! We will appreciate it. If your pull request is not accepted on the first try, don't be discouraged! If there's a problem with the implementation, hopefully you received feedback on what to improve. We're trying very hard to keep runc lean and focused. We don't want it to do everything for everybody. This means that we might decide against incorporating a new feature. However, there might be a way to implement that feature *on top of* runc. ### Conventions Fork the repo and make changes on your fork in a feature branch: - If it's a bugfix branch, name it XXX-something where XXX is the number of the issue - If it's a feature branch, create an enhancement issue to announce your intentions, and name it XXX-something where XXX is the number of the issue. Submit unit tests for your changes. Go has a great test framework built in; use it! Take a look at existing tests for inspiration. Run the full test suite on your branch before submitting a pull request. Update the documentation when creating or modifying features. Test your documentation changes for clarity, concision, and correctness, as well as a clean documentation build. See ``docs/README.md`` for more information on building the docs and how docs get released. Write clean code. Universally formatted code promotes ease of writing, reading, and maintenance. Always run `gofmt -s -w file.go` on each changed file before committing your changes. Most editors have plugins that do this automatically. Pull requests descriptions should be as clear as possible and include a reference to all the issues that they address. Pull requests must not contain commits from other users or branches. Commit messages must start with a capitalized and short summary (max. 50 chars) written in the imperative, followed by an optional, more detailed explanatory text which is separated from the summary by an empty line. Code review comments may be added to your pull request. Discuss, then make the suggested modifications and push additional commits to your feature branch. Be sure to post a comment after pushing. The new commits will show up in the pull request automatically, but the reviewers will not be notified unless you comment. Before the pull request is merged, make sure that you squash your commits into logical units of work using `git rebase -i` and `git push -f`. After every commit the test suite should be passing. Include documentation changes in the same commit so that a revert would remove all traces of the feature or fix. Commits that fix or close an issue should include a reference like `Closes #XXX` or `Fixes #XXX`, which will automatically close the issue when merged. ### Sign your work The sign-off is a simple line at the end of the explanation for the patch, which certifies that you wrote it or otherwise have the right to pass it on as an open-source patch. The rules are pretty simple: if you can certify the below (from [developercertificate.org](http://developercertificate.org/)): ``` Developer Certificate of Origin Version 1.1 Copyright (C) 2004, 2006 The Linux Foundation and its contributors. 660 York Street, Suite 102, San Francisco, CA 94110 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Developer's Certificate of Origin 1.1 By making a contribution to this project, I certify that: (a) The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file; or (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as indicated in the file; or (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it. (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved. ``` then you just add a line to every git commit message: Signed-off-by: Joe Smith using your real name (sorry, no pseudonyms or anonymous contributions.) You can add the sign off when creating the git commit via `git commit -s`. docker-runc-tags-docker-1.13.1/Dockerfile000066400000000000000000000026301304443252500202070ustar00rootroot00000000000000FROM golang:1.7.1 # libseccomp in jessie is not _quite_ new enough -- need backports version RUN echo 'deb http://httpredir.debian.org/debian jessie-backports main' > /etc/apt/sources.list.d/backports.list RUN apt-get update && apt-get install -y \ build-essential \ curl \ gawk \ iptables \ jq \ pkg-config \ libaio-dev \ libcap-dev \ libprotobuf-dev \ libprotobuf-c0-dev \ libseccomp2/jessie-backports \ libseccomp-dev/jessie-backports \ protobuf-c-compiler \ protobuf-compiler \ python-minimal \ --no-install-recommends # install bats RUN cd /tmp \ && git clone https://github.com/sstephenson/bats.git \ && cd bats \ && git reset --hard 03608115df2071fff4eaaff1605768c275e5f81f \ && ./install.sh /usr/local \ && rm -rf /tmp/bats # install criu ENV CRIU_VERSION 1.7 RUN mkdir -p /usr/src/criu \ && curl -sSL https://github.com/xemul/criu/archive/v${CRIU_VERSION}.tar.gz | tar -v -C /usr/src/criu/ -xz --strip-components=1 \ && cd /usr/src/criu \ && make install-criu # setup a playground for us to spawn containers in ENV ROOTFS /busybox RUN mkdir -p ${ROOTFS} \ && curl -o- -sSL 'https://github.com/jpetazzo/docker-busybox/raw/buildroot-2014.11/rootfs.tar' | tar -C ${ROOTFS} -xf - COPY script/tmpmount / WORKDIR /go/src/github.com/opencontainers/runc ENTRYPOINT ["/tmpmount"] ADD . /go/src/github.com/opencontainers/runc docker-runc-tags-docker-1.13.1/Godeps/000077500000000000000000000000001304443252500174355ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/Godeps.json000066400000000000000000000043621304443252500215560ustar00rootroot00000000000000{ "ImportPath": "github.com/opencontainers/runc", "GoVersion": "go1.5.3", "GodepVersion": "v74", "Deps": [ { "ImportPath": "github.com/Sirupsen/logrus", "Comment": "v0.7.3-2-g26709e2", "Rev": "26709e2714106fb8ad40b773b711ebce25b78914" }, { "ImportPath": "github.com/urfave/cli", "Comment": "v1.18.0-67-gd53eb99", "Rev": "d53eb991652b1d438abdd34ce4bfa3ef1539108e" }, { "ImportPath": "github.com/coreos/go-systemd/activation", "Comment": "v4", "Rev": "b4a58d95188dd092ae20072bac14cece0e67c388" }, { "ImportPath": "github.com/coreos/go-systemd/dbus", "Comment": "v4", "Rev": "b4a58d95188dd092ae20072bac14cece0e67c388" }, { "ImportPath": "github.com/coreos/go-systemd/util", "Comment": "v4", "Rev": "b4a58d95188dd092ae20072bac14cece0e67c388" }, { "ImportPath": "github.com/docker/docker/pkg/mount", "Comment": "v1.4.1-4831-g0f5c9d3", "Rev": "0f5c9d301b9b1cca66b3ea0f9dec3b5317d3686d" }, { "ImportPath": "github.com/docker/docker/pkg/symlink", "Comment": "v1.4.1-4831-g0f5c9d3", "Rev": "0f5c9d301b9b1cca66b3ea0f9dec3b5317d3686d" }, { "ImportPath": "github.com/docker/docker/pkg/term", "Comment": "v1.4.1-4831-g0f5c9d3", "Rev": "0f5c9d301b9b1cca66b3ea0f9dec3b5317d3686d" }, { "ImportPath": "github.com/docker/go-units", "Comment": "v0.1.0", "Rev": "9b001659dd36225e356b4467c465d732e745f53d" }, { "ImportPath": "github.com/godbus/dbus", "Comment": "v3", "Rev": "c7fdd8b5cd55e87b4e1f4e372cdb1db61dd6c66f" }, { "ImportPath": "github.com/golang/protobuf/proto", "Rev": "f7137ae6b19afbfd61a94b746fda3b3fe0491874" }, { "ImportPath": "github.com/opencontainers/runtime-spec/specs-go", "Comment": "v1.0.0-rc2-38-g1c7c27d", "Rev": "1c7c27d043c2a5e513a44084d2b10d77d1402b8c" }, { "ImportPath": "github.com/seccomp/libseccomp-golang", "Rev": "32f571b70023028bd57d9288c20efbcb237f3ce0" }, { "ImportPath": "github.com/syndtr/gocapability/capability", "Rev": "e7cb7fa329f456b3855136a2642b197bad7366ba" }, { "ImportPath": "github.com/vishvananda/netlink", "Rev": "1e2e08e8a2dcdacaae3f14ac44c5cfa31361f270" }, { "ImportPath": "github.com/mrunalp/fileutils", "Rev": "ed869b029674c0e9ce4c0dfa781405c2d9946d08" } ] } docker-runc-tags-docker-1.13.1/Godeps/Readme000066400000000000000000000002101304443252500205460ustar00rootroot00000000000000This directory tree is generated automatically by godep. Please do not edit. See https://github.com/tools/godep for more information. docker-runc-tags-docker-1.13.1/Godeps/_workspace/000077500000000000000000000000001304443252500215725ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/.gitignore000066400000000000000000000000121304443252500235530ustar00rootroot00000000000000/pkg /bin docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/000077500000000000000000000000001304443252500223615ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/000077500000000000000000000000001304443252500244205ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/000077500000000000000000000000001304443252500256675ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/000077500000000000000000000000001304443252500271365ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/LICENSE000066400000000000000000000250151304443252500301460ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 https://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS Copyright 2013-2015 Docker, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/NOTICE000066400000000000000000000011761304443252500300470ustar00rootroot00000000000000Docker Copyright 2012-2015 Docker, Inc. This product includes software developed at Docker, Inc. (https://www.docker.com). This product contains software (https://github.com/kr/pty) developed by Keith Rarick, licensed under the MIT License. The following is courtesy of our legal counsel: Use and transfer of Docker may be subject to certain restrictions by the United States and other governments. It is your responsibility to ensure that your use and/or transfer does not violate applicable laws. For more information, please see https://www.bis.doc.gov See also https://www.apache.org/dev/crypto.html and/or seek legal counsel. docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/000077500000000000000000000000001304443252500277175ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/mflag/000077500000000000000000000000001304443252500310055ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/mflag/LICENSE000066400000000000000000000027251304443252500320200ustar00rootroot00000000000000Copyright (c) 2014-2015 The Docker & Go Authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/mount/000077500000000000000000000000001304443252500310615ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/mount/flags.go000066400000000000000000000035571304443252500325160ustar00rootroot00000000000000package mount import ( "strings" ) // Parse fstab type mount options into mount() flags // and device specific data func parseOptions(options string) (int, string) { var ( flag int data []string ) flags := map[string]struct { clear bool flag int }{ "defaults": {false, 0}, "ro": {false, RDONLY}, "rw": {true, RDONLY}, "suid": {true, NOSUID}, "nosuid": {false, NOSUID}, "dev": {true, NODEV}, "nodev": {false, NODEV}, "exec": {true, NOEXEC}, "noexec": {false, NOEXEC}, "sync": {false, SYNCHRONOUS}, "async": {true, SYNCHRONOUS}, "dirsync": {false, DIRSYNC}, "remount": {false, REMOUNT}, "mand": {false, MANDLOCK}, "nomand": {true, MANDLOCK}, "atime": {true, NOATIME}, "noatime": {false, NOATIME}, "diratime": {true, NODIRATIME}, "nodiratime": {false, NODIRATIME}, "bind": {false, BIND}, "rbind": {false, RBIND}, "unbindable": {false, UNBINDABLE}, "runbindable": {false, RUNBINDABLE}, "private": {false, PRIVATE}, "rprivate": {false, RPRIVATE}, "shared": {false, SHARED}, "rshared": {false, RSHARED}, "slave": {false, SLAVE}, "rslave": {false, RSLAVE}, "relatime": {false, RELATIME}, "norelatime": {true, RELATIME}, "strictatime": {false, STRICTATIME}, "nostrictatime": {true, STRICTATIME}, } for _, o := range strings.Split(options, ",") { // If the option does not exist in the flags table or the flag // is not supported on the platform, // then it is a data value for a specific fs type if f, exists := flags[o]; exists && f.flag != 0 { if f.clear { flag &= ^f.flag } else { flag |= f.flag } } else { data = append(data, o) } } return flag, strings.Join(data, ",") } flags_freebsd.go000066400000000000000000000016661304443252500341300ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/mount// +build freebsd,cgo package mount /* #include */ import "C" const ( // RDONLY will mount the filesystem as read-only. RDONLY = C.MNT_RDONLY // NOSUID will not allow set-user-identifier or set-group-identifier bits to // take effect. NOSUID = C.MNT_NOSUID // NOEXEC will not allow execution of any binaries on the mounted file system. NOEXEC = C.MNT_NOEXEC // SYNCHRONOUS will allow any I/O to the file system to be done synchronously. SYNCHRONOUS = C.MNT_SYNCHRONOUS // NOATIME will not update the file access time when reading from a file. NOATIME = C.MNT_NOATIME ) // These flags are unsupported. const ( BIND = 0 DIRSYNC = 0 MANDLOCK = 0 NODEV = 0 NODIRATIME = 0 UNBINDABLE = 0 RUNBINDABLE = 0 PRIVATE = 0 RPRIVATE = 0 SHARED = 0 RSHARED = 0 SLAVE = 0 RSLAVE = 0 RBIND = 0 RELATIVE = 0 RELATIME = 0 REMOUNT = 0 STRICTATIME = 0 ) flags_linux.go000066400000000000000000000054571304443252500336570ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/mountpackage mount import ( "syscall" ) const ( // RDONLY will mount the file system read-only. RDONLY = syscall.MS_RDONLY // NOSUID will not allow set-user-identifier or set-group-identifier bits to // take effect. NOSUID = syscall.MS_NOSUID // NODEV will not interpret character or block special devices on the file // system. NODEV = syscall.MS_NODEV // NOEXEC will not allow execution of any binaries on the mounted file system. NOEXEC = syscall.MS_NOEXEC // SYNCHRONOUS will allow I/O to the file system to be done synchronously. SYNCHRONOUS = syscall.MS_SYNCHRONOUS // DIRSYNC will force all directory updates within the file system to be done // synchronously. This affects the following system calls: creat, link, // unlink, symlink, mkdir, rmdir, mknod and rename. DIRSYNC = syscall.MS_DIRSYNC // REMOUNT will attempt to remount an already-mounted file system. This is // commonly used to change the mount flags for a file system, especially to // make a readonly file system writeable. It does not change device or mount // point. REMOUNT = syscall.MS_REMOUNT // MANDLOCK will force mandatory locks on a filesystem. MANDLOCK = syscall.MS_MANDLOCK // NOATIME will not update the file access time when reading from a file. NOATIME = syscall.MS_NOATIME // NODIRATIME will not update the directory access time. NODIRATIME = syscall.MS_NODIRATIME // BIND remounts a subtree somewhere else. BIND = syscall.MS_BIND // RBIND remounts a subtree and all possible submounts somewhere else. RBIND = syscall.MS_BIND | syscall.MS_REC // UNBINDABLE creates a mount which cannot be cloned through a bind operation. UNBINDABLE = syscall.MS_UNBINDABLE // RUNBINDABLE marks the entire mount tree as UNBINDABLE. RUNBINDABLE = syscall.MS_UNBINDABLE | syscall.MS_REC // PRIVATE creates a mount which carries no propagation abilities. PRIVATE = syscall.MS_PRIVATE // RPRIVATE marks the entire mount tree as PRIVATE. RPRIVATE = syscall.MS_PRIVATE | syscall.MS_REC // SLAVE creates a mount which receives propagation from its master, but not // vice versa. SLAVE = syscall.MS_SLAVE // RSLAVE marks the entire mount tree as SLAVE. RSLAVE = syscall.MS_SLAVE | syscall.MS_REC // SHARED creates a mount which provides the ability to create mirrors of // that mount such that mounts and unmounts within any of the mirrors // propagate to the other mirrors. SHARED = syscall.MS_SHARED // RSHARED marks the entire mount tree as SHARED. RSHARED = syscall.MS_SHARED | syscall.MS_REC // RELATIME updates inode access times relative to modify or change time. RELATIME = syscall.MS_RELATIME // STRICTATIME allows to explicitly request full atime updates. This makes // it possible for the kernel to default to relatime or noatime but still // allow userspace to override it. STRICTATIME = syscall.MS_STRICTATIME ) flags_unsupported.go000066400000000000000000000007501304443252500350770ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/mount// +build !linux,!freebsd freebsd,!cgo package mount // These flags are unsupported. const ( BIND = 0 DIRSYNC = 0 MANDLOCK = 0 NOATIME = 0 NODEV = 0 NODIRATIME = 0 NOEXEC = 0 NOSUID = 0 UNBINDABLE = 0 RUNBINDABLE = 0 PRIVATE = 0 RPRIVATE = 0 SHARED = 0 RSHARED = 0 SLAVE = 0 RSLAVE = 0 RBIND = 0 RELATIME = 0 RELATIVE = 0 REMOUNT = 0 STRICTATIME = 0 SYNCHRONOUS = 0 RDONLY = 0 ) docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/mount/mount.go000066400000000000000000000040741304443252500325570ustar00rootroot00000000000000package mount import ( "time" ) // GetMounts retrieves a list of mounts for the current running process. func GetMounts() ([]*Info, error) { return parseMountTable() } // Mounted looks at /proc/self/mountinfo to determine of the specified // mountpoint has been mounted func Mounted(mountpoint string) (bool, error) { entries, err := parseMountTable() if err != nil { return false, err } // Search the table for the mountpoint for _, e := range entries { if e.Mountpoint == mountpoint { return true, nil } } return false, nil } // Mount will mount filesystem according to the specified configuration, on the // condition that the target path is *not* already mounted. Options must be // specified like the mount or fstab unix commands: "opt1=val1,opt2=val2". See // flags.go for supported option flags. func Mount(device, target, mType, options string) error { flag, _ := parseOptions(options) if flag&REMOUNT != REMOUNT { if mounted, err := Mounted(target); err != nil || mounted { return err } } return ForceMount(device, target, mType, options) } // ForceMount will mount a filesystem according to the specified configuration, // *regardless* if the target path is not already mounted. Options must be // specified like the mount or fstab unix commands: "opt1=val1,opt2=val2". See // flags.go for supported option flags. func ForceMount(device, target, mType, options string) error { flag, data := parseOptions(options) if err := mount(device, target, mType, uintptr(flag), data); err != nil { return err } return nil } // Unmount will unmount the target filesystem, so long as it is mounted. func Unmount(target string) error { if mounted, err := Mounted(target); err != nil || !mounted { return err } return ForceUnmount(target) } // ForceUnmount will force an unmount of the target filesystem, regardless if // it is mounted or not. func ForceUnmount(target string) (err error) { // Simple retry logic for unmount for i := 0; i < 10; i++ { if err = unmount(target, 0); err == nil { return nil } time.Sleep(100 * time.Millisecond) } return } mounter_freebsd.go000066400000000000000000000023711304443252500345170ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/mountpackage mount /* #include #include #include #include #include #include */ import "C" import ( "fmt" "strings" "syscall" "unsafe" ) func allocateIOVecs(options []string) []C.struct_iovec { out := make([]C.struct_iovec, len(options)) for i, option := range options { out[i].iov_base = unsafe.Pointer(C.CString(option)) out[i].iov_len = C.size_t(len(option) + 1) } return out } func mount(device, target, mType string, flag uintptr, data string) error { isNullFS := false xs := strings.Split(data, ",") for _, x := range xs { if x == "bind" { isNullFS = true } } options := []string{"fspath", target} if isNullFS { options = append(options, "fstype", "nullfs", "target", device) } else { options = append(options, "fstype", mType, "from", device) } rawOptions := allocateIOVecs(options) for _, rawOption := range rawOptions { defer C.free(rawOption.iov_base) } if errno := C.nmount(&rawOptions[0], C.uint(len(options)), C.int(flag)); errno != 0 { reason := C.GoString(C.strerror(*C.__error())) return fmt.Errorf("Failed to call nmount: %s", reason) } return nil } func unmount(target string, flag int) error { return syscall.Unmount(target, flag) } mounter_linux.go000066400000000000000000000010221304443252500342340ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/mountpackage mount import ( "syscall" ) func mount(device, target, mType string, flag uintptr, data string) error { if err := syscall.Mount(device, target, mType, flag, data); err != nil { return err } // If we have a bind mount or remount, remount... if flag&syscall.MS_BIND == syscall.MS_BIND && flag&syscall.MS_RDONLY == syscall.MS_RDONLY { return syscall.Mount(device, target, mType, flag|syscall.MS_REMOUNT, data) } return nil } func unmount(target string, flag int) error { return syscall.Unmount(target, flag) } mounter_unsupported.go000066400000000000000000000003521304443252500354720ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/mount// +build !linux,!freebsd freebsd,!cgo package mount func mount(device, target, mType string, flag uintptr, data string) error { panic("Not implemented") } func unmount(target string, flag int) error { panic("Not implemented") } docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/mount/mountinfo.go000066400000000000000000000020511304443252500334240ustar00rootroot00000000000000package mount // Info reveals information about a particular mounted filesystem. This // struct is populated from the content in the /proc//mountinfo file. type Info struct { // ID is a unique identifier of the mount (may be reused after umount). ID int // Parent indicates the ID of the mount parent (or of self for the top of the // mount tree). Parent int // Major indicates one half of the device ID which identifies the device class. Major int // Minor indicates one half of the device ID which identifies a specific // instance of device. Minor int // Root of the mount within the filesystem. Root string // Mountpoint indicates the mount point relative to the process's root. Mountpoint string // Opts represents mount-specific options. Opts string // Optional represents optional fields. Optional string // Fstype indicates the type of filesystem, such as EXT3. Fstype string // Source indicates filesystem specific information or "none". Source string // VfsOpts represents per super block options. VfsOpts string } mountinfo_freebsd.go000066400000000000000000000016311304443252500350420ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/mountpackage mount /* #include #include #include */ import "C" import ( "fmt" "reflect" "unsafe" ) // Parse /proc/self/mountinfo because comparing Dev and ino does not work from // bind mounts. func parseMountTable() ([]*Info, error) { var rawEntries *C.struct_statfs count := int(C.getmntinfo(&rawEntries, C.MNT_WAIT)) if count == 0 { return nil, fmt.Errorf("Failed to call getmntinfo") } var entries []C.struct_statfs header := (*reflect.SliceHeader)(unsafe.Pointer(&entries)) header.Cap = count header.Len = count header.Data = uintptr(unsafe.Pointer(rawEntries)) var out []*Info for _, entry := range entries { var mountinfo Info mountinfo.Mountpoint = C.GoString(&entry.f_mntonname[0]) mountinfo.Source = C.GoString(&entry.f_mntfromname[0]) mountinfo.Fstype = C.GoString(&entry.f_fstypename[0]) out = append(out, &mountinfo) } return out, nil } mountinfo_linux.go000066400000000000000000000051341304443252500345710ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/mount// +build linux package mount import ( "bufio" "fmt" "io" "os" "strings" ) const ( /* 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue (1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11) (1) mount ID: unique identifier of the mount (may be reused after umount) (2) parent ID: ID of parent (or of self for the top of the mount tree) (3) major:minor: value of st_dev for files on filesystem (4) root: root of the mount within the filesystem (5) mount point: mount point relative to the process's root (6) mount options: per mount options (7) optional fields: zero or more fields of the form "tag[:value]" (8) separator: marks the end of the optional fields (9) filesystem type: name of filesystem of the form "type[.subtype]" (10) mount source: filesystem specific information or "none" (11) super options: per super block options*/ mountinfoFormat = "%d %d %d:%d %s %s %s %s" ) // Parse /proc/self/mountinfo because comparing Dev and ino does not work from // bind mounts func parseMountTable() ([]*Info, error) { f, err := os.Open("/proc/self/mountinfo") if err != nil { return nil, err } defer f.Close() return parseInfoFile(f) } func parseInfoFile(r io.Reader) ([]*Info, error) { var ( s = bufio.NewScanner(r) out = []*Info{} ) for s.Scan() { if err := s.Err(); err != nil { return nil, err } var ( p = &Info{} text = s.Text() optionalFields string ) if _, err := fmt.Sscanf(text, mountinfoFormat, &p.ID, &p.Parent, &p.Major, &p.Minor, &p.Root, &p.Mountpoint, &p.Opts, &optionalFields); err != nil { return nil, fmt.Errorf("Scanning '%s' failed: %s", text, err) } // Safe as mountinfo encodes mountpoints with spaces as \040. index := strings.Index(text, " - ") postSeparatorFields := strings.Fields(text[index+3:]) if len(postSeparatorFields) < 3 { return nil, fmt.Errorf("Error found less than 3 fields post '-' in %q", text) } if optionalFields != "-" { p.Optional = optionalFields } p.Fstype = postSeparatorFields[0] p.Source = postSeparatorFields[1] p.VfsOpts = strings.Join(postSeparatorFields[2:], " ") out = append(out, p) } return out, nil } // PidMountInfo collects the mounts for a specific process ID. If the process // ID is unknown, it is better to use `GetMounts` which will inspect // "/proc/self/mountinfo" instead. func PidMountInfo(pid int) ([]*Info, error) { f, err := os.Open(fmt.Sprintf("/proc/%d/mountinfo", pid)) if err != nil { return nil, err } defer f.Close() return parseInfoFile(f) } mountinfo_unsupported.go000066400000000000000000000003541304443252500360210ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/mount// +build !linux,!freebsd freebsd,!cgo package mount import ( "fmt" "runtime" ) func parseMountTable() ([]*Info, error) { return nil, fmt.Errorf("mount.parseMountTable is not implemented on %s/%s", runtime.GOOS, runtime.GOARCH) } sharedsubtree_linux.go000066400000000000000000000044331304443252500354140ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/mount// +build linux package mount // MakeShared ensures a mounted filesystem has the SHARED mount option enabled. // See the supported options in flags.go for further reference. func MakeShared(mountPoint string) error { return ensureMountedAs(mountPoint, "shared") } // MakeRShared ensures a mounted filesystem has the RSHARED mount option enabled. // See the supported options in flags.go for further reference. func MakeRShared(mountPoint string) error { return ensureMountedAs(mountPoint, "rshared") } // MakePrivate ensures a mounted filesystem has the PRIVATE mount option enabled. // See the supported options in flags.go for further reference. func MakePrivate(mountPoint string) error { return ensureMountedAs(mountPoint, "private") } // MakeRPrivate ensures a mounted filesystem has the RPRIVATE mount option // enabled. See the supported options in flags.go for further reference. func MakeRPrivate(mountPoint string) error { return ensureMountedAs(mountPoint, "rprivate") } // MakeSlave ensures a mounted filesystem has the SLAVE mount option enabled. // See the supported options in flags.go for further reference. func MakeSlave(mountPoint string) error { return ensureMountedAs(mountPoint, "slave") } // MakeRSlave ensures a mounted filesystem has the RSLAVE mount option enabled. // See the supported options in flags.go for further reference. func MakeRSlave(mountPoint string) error { return ensureMountedAs(mountPoint, "rslave") } // MakeUnbindable ensures a mounted filesystem has the UNBINDABLE mount option // enabled. See the supported options in flags.go for further reference. func MakeUnbindable(mountPoint string) error { return ensureMountedAs(mountPoint, "unbindable") } // MakeRUnbindable ensures a mounted filesystem has the RUNBINDABLE mount // option enabled. See the supported options in flags.go for further reference. func MakeRUnbindable(mountPoint string) error { return ensureMountedAs(mountPoint, "runbindable") } func ensureMountedAs(mountPoint, options string) error { mounted, err := Mounted(mountPoint) if err != nil { return err } if !mounted { if err := Mount(mountPoint, mountPoint, "none", "bind,rw"); err != nil { return err } } mounted, err = Mounted(mountPoint) if err != nil { return err } return ForceMount("", mountPoint, "none", options) } docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/symlink/000077500000000000000000000000001304443252500314055ustar00rootroot00000000000000LICENSE.APACHE000066400000000000000000000250131304443252500332540ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/symlink Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS Copyright 2014-2015 Docker, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. LICENSE.BSD000066400000000000000000000027251304443252500327500ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/symlinkCopyright (c) 2014-2015 The Docker & Go Authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/symlink/README.md000066400000000000000000000004731304443252500326700ustar00rootroot00000000000000Package symlink implements EvalSymlinksInScope which is an extension of filepath.EvalSymlinks from the [Go standard library](https://golang.org/pkg/path/filepath). The code from filepath.EvalSymlinks has been adapted in fs.go. Please read the LICENSE.BSD file that governs fs.go and LICENSE.APACHE for fs_test.go. docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/symlink/fs.go000066400000000000000000000104101304443252500323400ustar00rootroot00000000000000// Copyright 2012 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE.BSD file. // This code is a modified version of path/filepath/symlink.go from the Go standard library. package symlink import ( "bytes" "errors" "os" "path/filepath" "strings" ) // FollowSymlinkInScope is a wrapper around evalSymlinksInScope that returns an absolute path func FollowSymlinkInScope(path, root string) (string, error) { path, err := filepath.Abs(path) if err != nil { return "", err } root, err = filepath.Abs(root) if err != nil { return "", err } return evalSymlinksInScope(path, root) } // evalSymlinksInScope will evaluate symlinks in `path` within a scope `root` and return // a result guaranteed to be contained within the scope `root`, at the time of the call. // Symlinks in `root` are not evaluated and left as-is. // Errors encountered while attempting to evaluate symlinks in path will be returned. // Non-existing paths are valid and do not constitute an error. // `path` has to contain `root` as a prefix, or else an error will be returned. // Trying to break out from `root` does not constitute an error. // // Example: // If /foo/bar -> /outside, // FollowSymlinkInScope("/foo/bar", "/foo") == "/foo/outside" instead of "/oustide" // // IMPORTANT: it is the caller's responsibility to call evalSymlinksInScope *after* relevant symlinks // are created and not to create subsequently, additional symlinks that could potentially make a // previously-safe path, unsafe. Example: if /foo/bar does not exist, evalSymlinksInScope("/foo/bar", "/foo") // would return "/foo/bar". If one makes /foo/bar a symlink to /baz subsequently, then "/foo/bar" should // no longer be considered safely contained in "/foo". func evalSymlinksInScope(path, root string) (string, error) { root = filepath.Clean(root) if path == root { return path, nil } if !strings.HasPrefix(path, root) { return "", errors.New("evalSymlinksInScope: " + path + " is not in " + root) } const maxIter = 255 originalPath := path // given root of "/a" and path of "/a/b/../../c" we want path to be "/b/../../c" path = path[len(root):] if root == string(filepath.Separator) { path = string(filepath.Separator) + path } if !strings.HasPrefix(path, string(filepath.Separator)) { return "", errors.New("evalSymlinksInScope: " + path + " is not in " + root) } path = filepath.Clean(path) // consume path by taking each frontmost path element, // expanding it if it's a symlink, and appending it to b var b bytes.Buffer // b here will always be considered to be the "current absolute path inside // root" when we append paths to it, we also append a slash and use // filepath.Clean after the loop to trim the trailing slash for n := 0; path != ""; n++ { if n > maxIter { return "", errors.New("evalSymlinksInScope: too many links in " + originalPath) } // find next path component, p i := strings.IndexRune(path, filepath.Separator) var p string if i == -1 { p, path = path, "" } else { p, path = path[:i], path[i+1:] } if p == "" { continue } // this takes a b.String() like "b/../" and a p like "c" and turns it // into "/b/../c" which then gets filepath.Cleaned into "/c" and then // root gets prepended and we Clean again (to remove any trailing slash // if the first Clean gave us just "/") cleanP := filepath.Clean(string(filepath.Separator) + b.String() + p) if cleanP == string(filepath.Separator) { // never Lstat "/" itself b.Reset() continue } fullP := filepath.Clean(root + cleanP) fi, err := os.Lstat(fullP) if os.IsNotExist(err) { // if p does not exist, accept it b.WriteString(p) b.WriteRune(filepath.Separator) continue } if err != nil { return "", err } if fi.Mode()&os.ModeSymlink == 0 { b.WriteString(p + string(filepath.Separator)) continue } // it's a symlink, put it at the front of path dest, err := os.Readlink(fullP) if err != nil { return "", err } if filepath.IsAbs(dest) { b.Reset() } path = dest + string(filepath.Separator) + path } // see note above on "fullP := ..." for why this is double-cleaned and // what's happening here return filepath.Clean(root + filepath.Clean(string(filepath.Separator)+b.String())), nil } docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/term/000077500000000000000000000000001304443252500306665ustar00rootroot00000000000000tc_linux_cgo.go000066400000000000000000000020231304443252500336100ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/term// +build linux,cgo package term import ( "syscall" "unsafe" ) // #include import "C" type Termios syscall.Termios // MakeRaw put the terminal connected to the given file descriptor into raw // mode and returns the previous state of the terminal so that it can be // restored. func MakeRaw(fd uintptr) (*State, error) { var oldState State if err := tcget(fd, &oldState.termios); err != 0 { return nil, err } newState := oldState.termios C.cfmakeraw((*C.struct_termios)(unsafe.Pointer(&newState))) newState.Oflag = newState.Oflag | C.OPOST if err := tcset(fd, &newState); err != 0 { return nil, err } return &oldState, nil } func tcget(fd uintptr, p *Termios) syscall.Errno { ret, err := C.tcgetattr(C.int(fd), (*C.struct_termios)(unsafe.Pointer(p))) if ret != 0 { return err.(syscall.Errno) } return 0 } func tcset(fd uintptr, p *Termios) syscall.Errno { ret, err := C.tcsetattr(C.int(fd), C.TCSANOW, (*C.struct_termios)(unsafe.Pointer(p))) if ret != 0 { return err.(syscall.Errno) } return 0 } docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/term/tc_other.go000066400000000000000000000006371304443252500330320ustar00rootroot00000000000000// +build !windows // +build !linux !cgo package term import ( "syscall" "unsafe" ) func tcget(fd uintptr, p *Termios) syscall.Errno { _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(getTermios), uintptr(unsafe.Pointer(p))) return err } func tcset(fd uintptr, p *Termios) syscall.Errno { _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, setTermios, uintptr(unsafe.Pointer(p))) return err } docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/term/term.go000066400000000000000000000043601304443252500321670ustar00rootroot00000000000000// +build !windows package term import ( "errors" "io" "os" "os/signal" "syscall" "unsafe" ) var ( ErrInvalidState = errors.New("Invalid terminal state") ) type State struct { termios Termios } type Winsize struct { Height uint16 Width uint16 x uint16 y uint16 } func StdStreams() (stdIn io.ReadCloser, stdOut, stdErr io.Writer) { return os.Stdin, os.Stdout, os.Stderr } func GetFdInfo(in interface{}) (uintptr, bool) { var inFd uintptr var isTerminalIn bool if file, ok := in.(*os.File); ok { inFd = file.Fd() isTerminalIn = IsTerminal(inFd) } return inFd, isTerminalIn } func GetWinsize(fd uintptr) (*Winsize, error) { ws := &Winsize{} _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(syscall.TIOCGWINSZ), uintptr(unsafe.Pointer(ws))) // Skipp errno = 0 if err == 0 { return ws, nil } return ws, err } func SetWinsize(fd uintptr, ws *Winsize) error { _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(syscall.TIOCSWINSZ), uintptr(unsafe.Pointer(ws))) // Skipp errno = 0 if err == 0 { return nil } return err } // IsTerminal returns true if the given file descriptor is a terminal. func IsTerminal(fd uintptr) bool { var termios Termios return tcget(fd, &termios) == 0 } // Restore restores the terminal connected to the given file descriptor to a // previous state. func RestoreTerminal(fd uintptr, state *State) error { if state == nil { return ErrInvalidState } if err := tcset(fd, &state.termios); err != 0 { return err } return nil } func SaveState(fd uintptr) (*State, error) { var oldState State if err := tcget(fd, &oldState.termios); err != 0 { return nil, err } return &oldState, nil } func DisableEcho(fd uintptr, state *State) error { newState := state.termios newState.Lflag &^= syscall.ECHO if err := tcset(fd, &newState); err != 0 { return err } handleInterrupt(fd, state) return nil } func SetRawTerminal(fd uintptr) (*State, error) { oldState, err := MakeRaw(fd) if err != nil { return nil, err } handleInterrupt(fd, oldState) return oldState, err } func handleInterrupt(fd uintptr, state *State) { sigchan := make(chan os.Signal, 1) signal.Notify(sigchan, os.Interrupt) go func() { _ = <-sigchan RestoreTerminal(fd, state) os.Exit(0) }() } term_windows.go000066400000000000000000000100671304443252500336630ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/term// +build windows package term import ( "io" "os" "github.com/Sirupsen/logrus" "github.com/docker/docker/pkg/term/winconsole" ) // State holds the console mode for the terminal. type State struct { mode uint32 } // Winsize is used for window size. type Winsize struct { Height uint16 Width uint16 x uint16 y uint16 } func StdStreams() (stdIn io.ReadCloser, stdOut, stdErr io.Writer) { switch { case os.Getenv("ConEmuANSI") == "ON": // The ConEmu shell emulates ANSI well by default. return os.Stdin, os.Stdout, os.Stderr case os.Getenv("MSYSTEM") != "": // MSYS (mingw) does not emulate ANSI well. return winconsole.WinConsoleStreams() default: return winconsole.WinConsoleStreams() } } // GetFdInfo returns file descriptor and bool indicating whether the file is a terminal. func GetFdInfo(in interface{}) (uintptr, bool) { return winconsole.GetHandleInfo(in) } // GetWinsize retrieves the window size of the terminal connected to the passed file descriptor. func GetWinsize(fd uintptr) (*Winsize, error) { info, err := winconsole.GetConsoleScreenBufferInfo(fd) if err != nil { return nil, err } // TODO(azlinux): Set the pixel width / height of the console (currently unused by any caller) return &Winsize{ Width: uint16(info.Window.Right - info.Window.Left + 1), Height: uint16(info.Window.Bottom - info.Window.Top + 1), x: 0, y: 0}, nil } // SetWinsize sets the size of the given terminal connected to the passed file descriptor. func SetWinsize(fd uintptr, ws *Winsize) error { // TODO(azlinux): Implement SetWinsize logrus.Debugf("[windows] SetWinsize: WARNING -- Unsupported method invoked") return nil } // IsTerminal returns true if the given file descriptor is a terminal. func IsTerminal(fd uintptr) bool { return winconsole.IsConsole(fd) } // RestoreTerminal restores the terminal connected to the given file descriptor to a // previous state. func RestoreTerminal(fd uintptr, state *State) error { return winconsole.SetConsoleMode(fd, state.mode) } // SaveState saves the state of the terminal connected to the given file descriptor. func SaveState(fd uintptr) (*State, error) { mode, e := winconsole.GetConsoleMode(fd) if e != nil { return nil, e } return &State{mode}, nil } // DisableEcho disables echo for the terminal connected to the given file descriptor. // -- See http://msdn.microsoft.com/en-us/library/windows/desktop/ms683462(v=vs.85).aspx func DisableEcho(fd uintptr, state *State) error { mode := state.mode mode &^= winconsole.ENABLE_ECHO_INPUT mode |= winconsole.ENABLE_PROCESSED_INPUT | winconsole.ENABLE_LINE_INPUT // TODO(azlinux): Core code registers a goroutine to catch os.Interrupt and reset the terminal state. return winconsole.SetConsoleMode(fd, mode) } // SetRawTerminal puts the terminal connected to the given file descriptor into raw // mode and returns the previous state of the terminal so that it can be // restored. func SetRawTerminal(fd uintptr) (*State, error) { state, err := MakeRaw(fd) if err != nil { return nil, err } // TODO(azlinux): Core code registers a goroutine to catch os.Interrupt and reset the terminal state. return state, err } // MakeRaw puts the terminal connected to the given file descriptor into raw // mode and returns the previous state of the terminal so that it can be // restored. func MakeRaw(fd uintptr) (*State, error) { state, err := SaveState(fd) if err != nil { return nil, err } // See // -- https://msdn.microsoft.com/en-us/library/windows/desktop/ms686033(v=vs.85).aspx // -- https://msdn.microsoft.com/en-us/library/windows/desktop/ms683462(v=vs.85).aspx mode := state.mode // Disable these modes mode &^= winconsole.ENABLE_ECHO_INPUT mode &^= winconsole.ENABLE_LINE_INPUT mode &^= winconsole.ENABLE_MOUSE_INPUT mode &^= winconsole.ENABLE_WINDOW_INPUT mode &^= winconsole.ENABLE_PROCESSED_INPUT // Enable these modes mode |= winconsole.ENABLE_EXTENDED_FLAGS mode |= winconsole.ENABLE_INSERT_MODE mode |= winconsole.ENABLE_QUICK_EDIT_MODE err = winconsole.SetConsoleMode(fd, mode) if err != nil { return nil, err } return state, nil } termios_darwin.go000066400000000000000000000030401304443252500341610ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/termpackage term import ( "syscall" "unsafe" ) const ( getTermios = syscall.TIOCGETA setTermios = syscall.TIOCSETA IGNBRK = syscall.IGNBRK PARMRK = syscall.PARMRK INLCR = syscall.INLCR IGNCR = syscall.IGNCR ECHONL = syscall.ECHONL CSIZE = syscall.CSIZE ICRNL = syscall.ICRNL ISTRIP = syscall.ISTRIP PARENB = syscall.PARENB ECHO = syscall.ECHO ICANON = syscall.ICANON ISIG = syscall.ISIG IXON = syscall.IXON BRKINT = syscall.BRKINT INPCK = syscall.INPCK OPOST = syscall.OPOST CS8 = syscall.CS8 IEXTEN = syscall.IEXTEN ) type Termios struct { Iflag uint64 Oflag uint64 Cflag uint64 Lflag uint64 Cc [20]byte Ispeed uint64 Ospeed uint64 } // MakeRaw put the terminal connected to the given file descriptor into raw // mode and returns the previous state of the terminal so that it can be // restored. func MakeRaw(fd uintptr) (*State, error) { var oldState State if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(getTermios), uintptr(unsafe.Pointer(&oldState.termios))); err != 0 { return nil, err } newState := oldState.termios newState.Iflag &^= (IGNBRK | BRKINT | PARMRK | ISTRIP | INLCR | IGNCR | ICRNL | IXON) newState.Oflag &^= OPOST newState.Lflag &^= (ECHO | ECHONL | ICANON | ISIG | IEXTEN) newState.Cflag &^= (CSIZE | PARENB) newState.Cflag |= CS8 newState.Cc[syscall.VMIN] = 1 newState.Cc[syscall.VTIME] = 0 if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(setTermios), uintptr(unsafe.Pointer(&newState))); err != 0 { return nil, err } return &oldState, nil } termios_freebsd.go000066400000000000000000000030401304443252500343070ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/termpackage term import ( "syscall" "unsafe" ) const ( getTermios = syscall.TIOCGETA setTermios = syscall.TIOCSETA IGNBRK = syscall.IGNBRK PARMRK = syscall.PARMRK INLCR = syscall.INLCR IGNCR = syscall.IGNCR ECHONL = syscall.ECHONL CSIZE = syscall.CSIZE ICRNL = syscall.ICRNL ISTRIP = syscall.ISTRIP PARENB = syscall.PARENB ECHO = syscall.ECHO ICANON = syscall.ICANON ISIG = syscall.ISIG IXON = syscall.IXON BRKINT = syscall.BRKINT INPCK = syscall.INPCK OPOST = syscall.OPOST CS8 = syscall.CS8 IEXTEN = syscall.IEXTEN ) type Termios struct { Iflag uint32 Oflag uint32 Cflag uint32 Lflag uint32 Cc [20]byte Ispeed uint32 Ospeed uint32 } // MakeRaw put the terminal connected to the given file descriptor into raw // mode and returns the previous state of the terminal so that it can be // restored. func MakeRaw(fd uintptr) (*State, error) { var oldState State if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(getTermios), uintptr(unsafe.Pointer(&oldState.termios))); err != 0 { return nil, err } newState := oldState.termios newState.Iflag &^= (IGNBRK | BRKINT | PARMRK | ISTRIP | INLCR | IGNCR | ICRNL | IXON) newState.Oflag &^= OPOST newState.Lflag &^= (ECHO | ECHONL | ICANON | ISIG | IEXTEN) newState.Cflag &^= (CSIZE | PARENB) newState.Cflag |= CS8 newState.Cc[syscall.VMIN] = 1 newState.Cc[syscall.VTIME] = 0 if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(setTermios), uintptr(unsafe.Pointer(&newState))); err != 0 { return nil, err } return &oldState, nil } termios_linux.go000066400000000000000000000022571304443252500340450ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/term// +build !cgo package term import ( "syscall" "unsafe" ) const ( getTermios = syscall.TCGETS setTermios = syscall.TCSETS ) type Termios struct { Iflag uint32 Oflag uint32 Cflag uint32 Lflag uint32 Cc [20]byte Ispeed uint32 Ospeed uint32 } // MakeRaw put the terminal connected to the given file descriptor into raw // mode and returns the previous state of the terminal so that it can be // restored. func MakeRaw(fd uintptr) (*State, error) { var oldState State if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, getTermios, uintptr(unsafe.Pointer(&oldState.termios))); err != 0 { return nil, err } newState := oldState.termios newState.Iflag &^= (syscall.IGNBRK | syscall.BRKINT | syscall.PARMRK | syscall.ISTRIP | syscall.INLCR | syscall.IGNCR | syscall.ICRNL | syscall.IXON) newState.Oflag &^= syscall.OPOST newState.Lflag &^= (syscall.ECHO | syscall.ECHONL | syscall.ICANON | syscall.ISIG | syscall.IEXTEN) newState.Cflag &^= (syscall.CSIZE | syscall.PARENB) newState.Cflag |= syscall.CS8 if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, setTermios, uintptr(unsafe.Pointer(&newState))); err != 0 { return nil, err } return &oldState, nil } docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/term/winconsole/000077500000000000000000000000001304443252500330465ustar00rootroot00000000000000console_windows.go000066400000000000000000001025111304443252500365320ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/term/winconsole// +build windows package winconsole import ( "bytes" "fmt" "io" "os" "strconv" "strings" "sync" "syscall" "unsafe" "github.com/Sirupsen/logrus" ) const ( // Consts for Get/SetConsoleMode function // -- See https://msdn.microsoft.com/en-us/library/windows/desktop/ms686033(v=vs.85).aspx ENABLE_PROCESSED_INPUT = 0x0001 ENABLE_LINE_INPUT = 0x0002 ENABLE_ECHO_INPUT = 0x0004 ENABLE_WINDOW_INPUT = 0x0008 ENABLE_MOUSE_INPUT = 0x0010 ENABLE_INSERT_MODE = 0x0020 ENABLE_QUICK_EDIT_MODE = 0x0040 ENABLE_EXTENDED_FLAGS = 0x0080 // If parameter is a screen buffer handle, additional values ENABLE_PROCESSED_OUTPUT = 0x0001 ENABLE_WRAP_AT_EOL_OUTPUT = 0x0002 //http://msdn.microsoft.com/en-us/library/windows/desktop/ms682088(v=vs.85).aspx#_win32_character_attributes FOREGROUND_BLUE = 1 FOREGROUND_GREEN = 2 FOREGROUND_RED = 4 FOREGROUND_INTENSITY = 8 FOREGROUND_MASK_SET = 0x000F FOREGROUND_MASK_UNSET = 0xFFF0 BACKGROUND_BLUE = 16 BACKGROUND_GREEN = 32 BACKGROUND_RED = 64 BACKGROUND_INTENSITY = 128 BACKGROUND_MASK_SET = 0x00F0 BACKGROUND_MASK_UNSET = 0xFF0F COMMON_LVB_REVERSE_VIDEO = 0x4000 COMMON_LVB_UNDERSCORE = 0x8000 // http://man7.org/linux/man-pages/man4/console_codes.4.html // ECMA-48 Set Graphics Rendition ANSI_ATTR_RESET = 0 ANSI_ATTR_BOLD = 1 ANSI_ATTR_DIM = 2 ANSI_ATTR_UNDERLINE = 4 ANSI_ATTR_BLINK = 5 ANSI_ATTR_REVERSE = 7 ANSI_ATTR_INVISIBLE = 8 ANSI_ATTR_UNDERLINE_OFF = 24 ANSI_ATTR_BLINK_OFF = 25 ANSI_ATTR_REVERSE_OFF = 27 ANSI_ATTR_INVISIBLE_OFF = 8 ANSI_FOREGROUND_BLACK = 30 ANSI_FOREGROUND_RED = 31 ANSI_FOREGROUND_GREEN = 32 ANSI_FOREGROUND_YELLOW = 33 ANSI_FOREGROUND_BLUE = 34 ANSI_FOREGROUND_MAGENTA = 35 ANSI_FOREGROUND_CYAN = 36 ANSI_FOREGROUND_WHITE = 37 ANSI_FOREGROUND_DEFAULT = 39 ANSI_BACKGROUND_BLACK = 40 ANSI_BACKGROUND_RED = 41 ANSI_BACKGROUND_GREEN = 42 ANSI_BACKGROUND_YELLOW = 43 ANSI_BACKGROUND_BLUE = 44 ANSI_BACKGROUND_MAGENTA = 45 ANSI_BACKGROUND_CYAN = 46 ANSI_BACKGROUND_WHITE = 47 ANSI_BACKGROUND_DEFAULT = 49 ANSI_MAX_CMD_LENGTH = 256 MAX_INPUT_EVENTS = 128 MAX_INPUT_BUFFER = 1024 DEFAULT_WIDTH = 80 DEFAULT_HEIGHT = 24 ) // http://msdn.microsoft.com/en-us/library/windows/desktop/dd375731(v=vs.85).aspx const ( VK_PRIOR = 0x21 // PAGE UP key VK_NEXT = 0x22 // PAGE DOWN key VK_END = 0x23 // END key VK_HOME = 0x24 // HOME key VK_LEFT = 0x25 // LEFT ARROW key VK_UP = 0x26 // UP ARROW key VK_RIGHT = 0x27 // RIGHT ARROW key VK_DOWN = 0x28 // DOWN ARROW key VK_SELECT = 0x29 // SELECT key VK_PRINT = 0x2A // PRINT key VK_EXECUTE = 0x2B // EXECUTE key VK_SNAPSHOT = 0x2C // PRINT SCREEN key VK_INSERT = 0x2D // INS key VK_DELETE = 0x2E // DEL key VK_HELP = 0x2F // HELP key VK_F1 = 0x70 // F1 key VK_F2 = 0x71 // F2 key VK_F3 = 0x72 // F3 key VK_F4 = 0x73 // F4 key VK_F5 = 0x74 // F5 key VK_F6 = 0x75 // F6 key VK_F7 = 0x76 // F7 key VK_F8 = 0x77 // F8 key VK_F9 = 0x78 // F9 key VK_F10 = 0x79 // F10 key VK_F11 = 0x7A // F11 key VK_F12 = 0x7B // F12 key ) var kernel32DLL = syscall.NewLazyDLL("kernel32.dll") var ( setConsoleModeProc = kernel32DLL.NewProc("SetConsoleMode") getConsoleScreenBufferInfoProc = kernel32DLL.NewProc("GetConsoleScreenBufferInfo") setConsoleCursorPositionProc = kernel32DLL.NewProc("SetConsoleCursorPosition") setConsoleTextAttributeProc = kernel32DLL.NewProc("SetConsoleTextAttribute") fillConsoleOutputCharacterProc = kernel32DLL.NewProc("FillConsoleOutputCharacterW") writeConsoleOutputProc = kernel32DLL.NewProc("WriteConsoleOutputW") readConsoleInputProc = kernel32DLL.NewProc("ReadConsoleInputW") getNumberOfConsoleInputEventsProc = kernel32DLL.NewProc("GetNumberOfConsoleInputEvents") getConsoleCursorInfoProc = kernel32DLL.NewProc("GetConsoleCursorInfo") setConsoleCursorInfoProc = kernel32DLL.NewProc("SetConsoleCursorInfo") setConsoleWindowInfoProc = kernel32DLL.NewProc("SetConsoleWindowInfo") setConsoleScreenBufferSizeProc = kernel32DLL.NewProc("SetConsoleScreenBufferSize") ) // types for calling various windows API // see http://msdn.microsoft.com/en-us/library/windows/desktop/ms682093(v=vs.85).aspx type ( SHORT int16 BOOL int32 WORD uint16 WCHAR uint16 DWORD uint32 SMALL_RECT struct { Left SHORT Top SHORT Right SHORT Bottom SHORT } COORD struct { X SHORT Y SHORT } CONSOLE_SCREEN_BUFFER_INFO struct { Size COORD CursorPosition COORD Attributes WORD Window SMALL_RECT MaximumWindowSize COORD } CONSOLE_CURSOR_INFO struct { Size DWORD Visible BOOL } // http://msdn.microsoft.com/en-us/library/windows/desktop/ms684166(v=vs.85).aspx KEY_EVENT_RECORD struct { KeyDown BOOL RepeatCount WORD VirtualKeyCode WORD VirtualScanCode WORD UnicodeChar WCHAR ControlKeyState DWORD } INPUT_RECORD struct { EventType WORD KeyEvent KEY_EVENT_RECORD } CHAR_INFO struct { UnicodeChar WCHAR Attributes WORD } ) // TODO(azlinux): Basic type clean-up // -- Convert all uses of uintptr to syscall.Handle to be consistent with Windows syscall // -- Convert, as appropriate, types to use defined Windows types (e.g., DWORD instead of uint32) // Implements the TerminalEmulator interface type WindowsTerminal struct { outMutex sync.Mutex inMutex sync.Mutex inputBuffer []byte inputSize int inputEvents []INPUT_RECORD screenBufferInfo *CONSOLE_SCREEN_BUFFER_INFO inputEscapeSequence []byte } func getStdHandle(stdhandle int) uintptr { handle, err := syscall.GetStdHandle(stdhandle) if err != nil { panic(fmt.Errorf("could not get standard io handle %d", stdhandle)) } return uintptr(handle) } func WinConsoleStreams() (stdIn io.ReadCloser, stdOut, stdErr io.Writer) { handler := &WindowsTerminal{ inputBuffer: make([]byte, MAX_INPUT_BUFFER), inputEscapeSequence: []byte(KEY_ESC_CSI), inputEvents: make([]INPUT_RECORD, MAX_INPUT_EVENTS), } if IsConsole(os.Stdin.Fd()) { stdIn = &terminalReader{ wrappedReader: os.Stdin, emulator: handler, command: make([]byte, 0, ANSI_MAX_CMD_LENGTH), fd: getStdHandle(syscall.STD_INPUT_HANDLE), } } else { stdIn = os.Stdin } if IsConsole(os.Stdout.Fd()) { stdoutHandle := getStdHandle(syscall.STD_OUTPUT_HANDLE) // Save current screen buffer info screenBufferInfo, err := GetConsoleScreenBufferInfo(stdoutHandle) if err != nil { // If GetConsoleScreenBufferInfo returns a nil error, it usually means that stdout is not a TTY. // However, this is in the branch where stdout is a TTY, hence the panic. panic("could not get console screen buffer info") } handler.screenBufferInfo = screenBufferInfo buffer = make([]CHAR_INFO, screenBufferInfo.MaximumWindowSize.X*screenBufferInfo.MaximumWindowSize.Y) stdOut = &terminalWriter{ wrappedWriter: os.Stdout, emulator: handler, command: make([]byte, 0, ANSI_MAX_CMD_LENGTH), fd: stdoutHandle, } } else { stdOut = os.Stdout } if IsConsole(os.Stderr.Fd()) { stdErr = &terminalWriter{ wrappedWriter: os.Stderr, emulator: handler, command: make([]byte, 0, ANSI_MAX_CMD_LENGTH), fd: getStdHandle(syscall.STD_ERROR_HANDLE), } } else { stdErr = os.Stderr } return stdIn, stdOut, stdErr } // GetHandleInfo returns file descriptor and bool indicating whether the file is a console. func GetHandleInfo(in interface{}) (uintptr, bool) { var inFd uintptr var isTerminalIn bool switch t := in.(type) { case *terminalReader: in = t.wrappedReader case *terminalWriter: in = t.wrappedWriter } if file, ok := in.(*os.File); ok { inFd = file.Fd() isTerminalIn = IsConsole(inFd) } return inFd, isTerminalIn } func getError(r1, r2 uintptr, lastErr error) error { // If the function fails, the return value is zero. if r1 == 0 { if lastErr != nil { return lastErr } return syscall.EINVAL } return nil } // GetConsoleMode gets the console mode for given file descriptor // http://msdn.microsoft.com/en-us/library/windows/desktop/ms683167(v=vs.85).aspx func GetConsoleMode(handle uintptr) (uint32, error) { var mode uint32 err := syscall.GetConsoleMode(syscall.Handle(handle), &mode) return mode, err } // SetConsoleMode sets the console mode for given file descriptor // http://msdn.microsoft.com/en-us/library/windows/desktop/ms686033(v=vs.85).aspx func SetConsoleMode(handle uintptr, mode uint32) error { return getError(setConsoleModeProc.Call(handle, uintptr(mode), 0)) } // SetCursorVisible sets the cursor visbility // http://msdn.microsoft.com/en-us/library/windows/desktop/ms686019(v=vs.85).aspx func SetCursorVisible(handle uintptr, isVisible BOOL) (bool, error) { var cursorInfo *CONSOLE_CURSOR_INFO = &CONSOLE_CURSOR_INFO{} if err := getError(getConsoleCursorInfoProc.Call(handle, uintptr(unsafe.Pointer(cursorInfo)), 0)); err != nil { return false, err } cursorInfo.Visible = isVisible if err := getError(setConsoleCursorInfoProc.Call(handle, uintptr(unsafe.Pointer(cursorInfo)), 0)); err != nil { return false, err } return true, nil } // SetWindowSize sets the size of the console window. func SetWindowSize(handle uintptr, width, height, max SHORT) (bool, error) { window := SMALL_RECT{Left: 0, Top: 0, Right: width - 1, Bottom: height - 1} coord := COORD{X: width - 1, Y: max} if err := getError(setConsoleWindowInfoProc.Call(handle, uintptr(1), uintptr(unsafe.Pointer(&window)))); err != nil { return false, err } if err := getError(setConsoleScreenBufferSizeProc.Call(handle, marshal(coord))); err != nil { return false, err } return true, nil } // GetConsoleScreenBufferInfo retrieves information about the specified console screen buffer. // http://msdn.microsoft.com/en-us/library/windows/desktop/ms683171(v=vs.85).aspx func GetConsoleScreenBufferInfo(handle uintptr) (*CONSOLE_SCREEN_BUFFER_INFO, error) { var info CONSOLE_SCREEN_BUFFER_INFO if err := getError(getConsoleScreenBufferInfoProc.Call(handle, uintptr(unsafe.Pointer(&info)), 0)); err != nil { return nil, err } return &info, nil } // setConsoleTextAttribute sets the attributes of characters written to the // console screen buffer by the WriteFile or WriteConsole function, // http://msdn.microsoft.com/en-us/library/windows/desktop/ms686047(v=vs.85).aspx func setConsoleTextAttribute(handle uintptr, attribute WORD) error { return getError(setConsoleTextAttributeProc.Call(handle, uintptr(attribute), 0)) } func writeConsoleOutput(handle uintptr, buffer []CHAR_INFO, bufferSize COORD, bufferCoord COORD, writeRegion *SMALL_RECT) (bool, error) { if err := getError(writeConsoleOutputProc.Call(handle, uintptr(unsafe.Pointer(&buffer[0])), marshal(bufferSize), marshal(bufferCoord), uintptr(unsafe.Pointer(writeRegion)))); err != nil { return false, err } return true, nil } // http://msdn.microsoft.com/en-us/library/windows/desktop/ms682663(v=vs.85).aspx func fillConsoleOutputCharacter(handle uintptr, fillChar byte, length uint32, writeCord COORD) (bool, error) { out := int64(0) if err := getError(fillConsoleOutputCharacterProc.Call(handle, uintptr(fillChar), uintptr(length), marshal(writeCord), uintptr(unsafe.Pointer(&out)))); err != nil { return false, err } return true, nil } // Gets the number of space characters to write for "clearing" the section of terminal func getNumberOfChars(fromCoord COORD, toCoord COORD, screenSize COORD) uint32 { // must be valid cursor position if fromCoord.X < 0 || fromCoord.Y < 0 || toCoord.X < 0 || toCoord.Y < 0 { return 0 } if fromCoord.X >= screenSize.X || fromCoord.Y >= screenSize.Y || toCoord.X >= screenSize.X || toCoord.Y >= screenSize.Y { return 0 } // can't be backwards if fromCoord.Y > toCoord.Y { return 0 } // same line if fromCoord.Y == toCoord.Y { return uint32(toCoord.X-fromCoord.X) + 1 } // spans more than one line if fromCoord.Y < toCoord.Y { // from start till end of line for first line + from start of line till end retValue := uint32(screenSize.X-fromCoord.X) + uint32(toCoord.X) + 1 // don't count first and last line linesBetween := toCoord.Y - fromCoord.Y - 1 if linesBetween > 0 { retValue = retValue + uint32(linesBetween*screenSize.X) } return retValue } return 0 } var buffer []CHAR_INFO func clearDisplayRect(handle uintptr, attributes WORD, fromCoord COORD, toCoord COORD) (uint32, error) { var writeRegion SMALL_RECT writeRegion.Left = fromCoord.X writeRegion.Top = fromCoord.Y writeRegion.Right = toCoord.X writeRegion.Bottom = toCoord.Y // allocate and initialize buffer width := toCoord.X - fromCoord.X + 1 height := toCoord.Y - fromCoord.Y + 1 size := uint32(width) * uint32(height) if size > 0 { buffer := make([]CHAR_INFO, size) for i := range buffer { buffer[i] = CHAR_INFO{WCHAR(' '), attributes} } // Write to buffer r, err := writeConsoleOutput(handle, buffer, COORD{X: width, Y: height}, COORD{X: 0, Y: 0}, &writeRegion) if !r { if err != nil { return 0, err } return 0, syscall.EINVAL } } return uint32(size), nil } func clearDisplayRange(handle uintptr, attributes WORD, fromCoord COORD, toCoord COORD) (uint32, error) { nw := uint32(0) // start and end on same line if fromCoord.Y == toCoord.Y { return clearDisplayRect(handle, attributes, fromCoord, toCoord) } // TODO(azlinux): if full screen, optimize // spans more than one line if fromCoord.Y < toCoord.Y { // from start position till end of line for first line n, err := clearDisplayRect(handle, attributes, fromCoord, COORD{X: toCoord.X, Y: fromCoord.Y}) if err != nil { return nw, err } nw += n // lines between linesBetween := toCoord.Y - fromCoord.Y - 1 if linesBetween > 0 { n, err = clearDisplayRect(handle, attributes, COORD{X: 0, Y: fromCoord.Y + 1}, COORD{X: toCoord.X, Y: toCoord.Y - 1}) if err != nil { return nw, err } nw += n } // lines at end n, err = clearDisplayRect(handle, attributes, COORD{X: 0, Y: toCoord.Y}, toCoord) if err != nil { return nw, err } nw += n } return nw, nil } // setConsoleCursorPosition sets the console cursor position // Note The X and Y are zero based // If relative is true then the new position is relative to current one func setConsoleCursorPosition(handle uintptr, isRelative bool, column int16, line int16) error { screenBufferInfo, err := GetConsoleScreenBufferInfo(handle) if err != nil { return err } var position COORD if isRelative { position.X = screenBufferInfo.CursorPosition.X + SHORT(column) position.Y = screenBufferInfo.CursorPosition.Y + SHORT(line) } else { position.X = SHORT(column) position.Y = SHORT(line) } return getError(setConsoleCursorPositionProc.Call(handle, marshal(position), 0)) } // http://msdn.microsoft.com/en-us/library/windows/desktop/ms683207(v=vs.85).aspx func getNumberOfConsoleInputEvents(handle uintptr) (uint16, error) { var n DWORD if err := getError(getNumberOfConsoleInputEventsProc.Call(handle, uintptr(unsafe.Pointer(&n)))); err != nil { return 0, err } return uint16(n), nil } //http://msdn.microsoft.com/en-us/library/windows/desktop/ms684961(v=vs.85).aspx func readConsoleInputKey(handle uintptr, inputBuffer []INPUT_RECORD) (int, error) { var nr DWORD if err := getError(readConsoleInputProc.Call(handle, uintptr(unsafe.Pointer(&inputBuffer[0])), uintptr(len(inputBuffer)), uintptr(unsafe.Pointer(&nr)))); err != nil { return 0, err } return int(nr), nil } func getWindowsTextAttributeForAnsiValue(originalFlag WORD, defaultValue WORD, ansiValue int16) (WORD, error) { flag := WORD(originalFlag) if flag == 0 { flag = defaultValue } switch ansiValue { case ANSI_ATTR_RESET: flag &^= COMMON_LVB_UNDERSCORE flag &^= BACKGROUND_INTENSITY flag = flag | FOREGROUND_INTENSITY case ANSI_ATTR_INVISIBLE: // TODO: how do you reset reverse? case ANSI_ATTR_UNDERLINE: flag = flag | COMMON_LVB_UNDERSCORE case ANSI_ATTR_BLINK: // seems like background intenisty is blink flag = flag | BACKGROUND_INTENSITY case ANSI_ATTR_UNDERLINE_OFF: flag &^= COMMON_LVB_UNDERSCORE case ANSI_ATTR_BLINK_OFF: // seems like background intenisty is blink flag &^= BACKGROUND_INTENSITY case ANSI_ATTR_BOLD: flag = flag | FOREGROUND_INTENSITY case ANSI_ATTR_DIM: flag &^= FOREGROUND_INTENSITY case ANSI_ATTR_REVERSE, ANSI_ATTR_REVERSE_OFF: // swap forground and background bits foreground := flag & FOREGROUND_MASK_SET background := flag & BACKGROUND_MASK_SET flag = (flag & BACKGROUND_MASK_UNSET & FOREGROUND_MASK_UNSET) | (foreground << 4) | (background >> 4) // FOREGROUND case ANSI_FOREGROUND_DEFAULT: flag = (flag & FOREGROUND_MASK_UNSET) | (defaultValue & FOREGROUND_MASK_SET) case ANSI_FOREGROUND_BLACK: flag = flag ^ (FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE) case ANSI_FOREGROUND_RED: flag = (flag & FOREGROUND_MASK_UNSET) | FOREGROUND_RED case ANSI_FOREGROUND_GREEN: flag = (flag & FOREGROUND_MASK_UNSET) | FOREGROUND_GREEN case ANSI_FOREGROUND_YELLOW: flag = (flag & FOREGROUND_MASK_UNSET) | FOREGROUND_RED | FOREGROUND_GREEN case ANSI_FOREGROUND_BLUE: flag = (flag & FOREGROUND_MASK_UNSET) | FOREGROUND_BLUE case ANSI_FOREGROUND_MAGENTA: flag = (flag & FOREGROUND_MASK_UNSET) | FOREGROUND_RED | FOREGROUND_BLUE case ANSI_FOREGROUND_CYAN: flag = (flag & FOREGROUND_MASK_UNSET) | FOREGROUND_GREEN | FOREGROUND_BLUE case ANSI_FOREGROUND_WHITE: flag = (flag & FOREGROUND_MASK_UNSET) | FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE // Background case ANSI_BACKGROUND_DEFAULT: // Black with no intensity flag = (flag & BACKGROUND_MASK_UNSET) | (defaultValue & BACKGROUND_MASK_SET) case ANSI_BACKGROUND_BLACK: flag = (flag & BACKGROUND_MASK_UNSET) case ANSI_BACKGROUND_RED: flag = (flag & BACKGROUND_MASK_UNSET) | BACKGROUND_RED case ANSI_BACKGROUND_GREEN: flag = (flag & BACKGROUND_MASK_UNSET) | BACKGROUND_GREEN case ANSI_BACKGROUND_YELLOW: flag = (flag & BACKGROUND_MASK_UNSET) | BACKGROUND_RED | BACKGROUND_GREEN case ANSI_BACKGROUND_BLUE: flag = (flag & BACKGROUND_MASK_UNSET) | BACKGROUND_BLUE case ANSI_BACKGROUND_MAGENTA: flag = (flag & BACKGROUND_MASK_UNSET) | BACKGROUND_RED | BACKGROUND_BLUE case ANSI_BACKGROUND_CYAN: flag = (flag & BACKGROUND_MASK_UNSET) | BACKGROUND_GREEN | BACKGROUND_BLUE case ANSI_BACKGROUND_WHITE: flag = (flag & BACKGROUND_MASK_UNSET) | BACKGROUND_RED | BACKGROUND_GREEN | BACKGROUND_BLUE } return flag, nil } // HandleOutputCommand interpretes the Ansi commands and then makes appropriate Win32 calls func (term *WindowsTerminal) HandleOutputCommand(handle uintptr, command []byte) (n int, err error) { // always consider all the bytes in command, processed n = len(command) parsedCommand := parseAnsiCommand(command) logrus.Debugf("[windows] HandleOutputCommand: %v", parsedCommand) // console settings changes need to happen in atomic way term.outMutex.Lock() defer term.outMutex.Unlock() switch parsedCommand.Command { case "m": // [Value;...;Valuem // Set Graphics Mode: // Calls the graphics functions specified by the following values. // These specified functions remain active until the next occurrence of this escape sequence. // Graphics mode changes the colors and attributes of text (such as bold and underline) displayed on the screen. screenBufferInfo, err := GetConsoleScreenBufferInfo(handle) if err != nil { return n, err } flag := screenBufferInfo.Attributes for _, e := range parsedCommand.Parameters { value, _ := strconv.ParseInt(e, 10, 16) // base 10, 16 bit if value == ANSI_ATTR_RESET { flag = term.screenBufferInfo.Attributes // reset } else { flag, err = getWindowsTextAttributeForAnsiValue(flag, term.screenBufferInfo.Attributes, int16(value)) if err != nil { return n, err } } } if err := setConsoleTextAttribute(handle, flag); err != nil { return n, err } case "H", "f": // [line;columnH // [line;columnf // Moves the cursor to the specified position (coordinates). // If you do not specify a position, the cursor moves to the home position at the upper-left corner of the screen (line 0, column 0). screenBufferInfo, err := GetConsoleScreenBufferInfo(handle) if err != nil { return n, err } line, err := parseInt16OrDefault(parsedCommand.getParam(0), 1) if err != nil { return n, err } if line > int16(screenBufferInfo.Window.Bottom) { line = int16(screenBufferInfo.Window.Bottom) + 1 } column, err := parseInt16OrDefault(parsedCommand.getParam(1), 1) if err != nil { return n, err } if column > int16(screenBufferInfo.Window.Right) { column = int16(screenBufferInfo.Window.Right) + 1 } // The numbers are not 0 based, but 1 based logrus.Debugf("[windows] HandleOutputCommmand: Moving cursor to (%v,%v)", column-1, line-1) if err := setConsoleCursorPosition(handle, false, column-1, line-1); err != nil { return n, err } case "A": // [valueA // Moves the cursor up by the specified number of lines without changing columns. // If the cursor is already on the top line, ignores this sequence. value, err := parseInt16OrDefault(parsedCommand.getParam(0), 1) if err != nil { return len(command), err } if err := setConsoleCursorPosition(handle, true, 0, -value); err != nil { return n, err } case "B": // [valueB // Moves the cursor down by the specified number of lines without changing columns. // If the cursor is already on the bottom line, ignores this sequence. value, err := parseInt16OrDefault(parsedCommand.getParam(0), 1) if err != nil { return n, err } if err := setConsoleCursorPosition(handle, true, 0, value); err != nil { return n, err } case "C": // [valueC // Moves the cursor forward by the specified number of columns without changing lines. // If the cursor is already in the rightmost column, ignores this sequence. value, err := parseInt16OrDefault(parsedCommand.getParam(0), 1) if err != nil { return n, err } if err := setConsoleCursorPosition(handle, true, value, 0); err != nil { return n, err } case "D": // [valueD // Moves the cursor back by the specified number of columns without changing lines. // If the cursor is already in the leftmost column, ignores this sequence. value, err := parseInt16OrDefault(parsedCommand.getParam(0), 1) if err != nil { return n, err } if err := setConsoleCursorPosition(handle, true, -value, 0); err != nil { return n, err } case "J": // [J Erases from the cursor to the end of the screen, including the cursor position. // [1J Erases from the beginning of the screen to the cursor, including the cursor position. // [2J Erases the complete display. The cursor does not move. // Clears the screen and moves the cursor to the home position (line 0, column 0). value, err := parseInt16OrDefault(parsedCommand.getParam(0), 0) if err != nil { return n, err } var start COORD var cursor COORD var end COORD screenBufferInfo, err := GetConsoleScreenBufferInfo(handle) if err != nil { return n, err } switch value { case 0: start = screenBufferInfo.CursorPosition // end of the buffer end.X = screenBufferInfo.Size.X - 1 end.Y = screenBufferInfo.Size.Y - 1 // cursor cursor = screenBufferInfo.CursorPosition case 1: // start of the screen start.X = 0 start.Y = 0 // end of the screen end = screenBufferInfo.CursorPosition // cursor cursor = screenBufferInfo.CursorPosition case 2: // start of the screen start.X = 0 start.Y = 0 // end of the buffer end.X = screenBufferInfo.Size.X - 1 end.Y = screenBufferInfo.Size.Y - 1 // cursor cursor.X = 0 cursor.Y = 0 } if _, err := clearDisplayRange(uintptr(handle), term.screenBufferInfo.Attributes, start, end); err != nil { return n, err } // remember the the cursor position is 1 based if err := setConsoleCursorPosition(handle, false, int16(cursor.X), int16(cursor.Y)); err != nil { return n, err } case "K": // [K // Clears all characters from the cursor position to the end of the line (including the character at the cursor position). // [K Erases from the cursor to the end of the line, including the cursor position. // [1K Erases from the beginning of the line to the cursor, including the cursor position. // [2K Erases the complete line. value, err := parseInt16OrDefault(parsedCommand.getParam(0), 0) var start COORD var cursor COORD var end COORD screenBufferInfo, err := GetConsoleScreenBufferInfo(uintptr(handle)) if err != nil { return n, err } switch value { case 0: // start is where cursor is start = screenBufferInfo.CursorPosition // end of line end.X = screenBufferInfo.Size.X - 1 end.Y = screenBufferInfo.CursorPosition.Y // cursor remains the same cursor = screenBufferInfo.CursorPosition case 1: // beginning of line start.X = 0 start.Y = screenBufferInfo.CursorPosition.Y // until cursor end = screenBufferInfo.CursorPosition // cursor remains the same cursor = screenBufferInfo.CursorPosition case 2: // start of the line start.X = 0 start.Y = screenBufferInfo.CursorPosition.Y - 1 // end of the line end.X = screenBufferInfo.Size.X - 1 end.Y = screenBufferInfo.CursorPosition.Y - 1 // cursor cursor.X = 0 cursor.Y = screenBufferInfo.CursorPosition.Y - 1 } if _, err := clearDisplayRange(uintptr(handle), term.screenBufferInfo.Attributes, start, end); err != nil { return n, err } // remember the the cursor position is 1 based if err := setConsoleCursorPosition(uintptr(handle), false, int16(cursor.X), int16(cursor.Y)); err != nil { return n, err } case "l": for _, value := range parsedCommand.Parameters { switch value { case "?25", "25": SetCursorVisible(uintptr(handle), BOOL(0)) case "?1049", "1049": // TODO (azlinux): Restore terminal case "?1", "1": // If the DECCKM function is reset, then the arrow keys send ANSI cursor sequences to the host. term.inputEscapeSequence = []byte(KEY_ESC_CSI) } } case "h": for _, value := range parsedCommand.Parameters { switch value { case "?25", "25": SetCursorVisible(uintptr(handle), BOOL(1)) case "?1049", "1049": // TODO (azlinux): Save terminal case "?1", "1": // If the DECCKM function is set, then the arrow keys send application sequences to the host. // DECCKM (default off): When set, the cursor keys send an ESC O prefix, rather than ESC [. term.inputEscapeSequence = []byte(KEY_ESC_O) } } case "]": /* TODO (azlinux): Linux Console Private CSI Sequences The following sequences are neither ECMA-48 nor native VT102. They are native to the Linux console driver. Colors are in SGR parameters: 0 = black, 1 = red, 2 = green, 3 = brown, 4 = blue, 5 = magenta, 6 = cyan, 7 = white. ESC [ 1 ; n ] Set color n as the underline color ESC [ 2 ; n ] Set color n as the dim color ESC [ 8 ] Make the current color pair the default attributes. ESC [ 9 ; n ] Set screen blank timeout to n minutes. ESC [ 10 ; n ] Set bell frequency in Hz. ESC [ 11 ; n ] Set bell duration in msec. ESC [ 12 ; n ] Bring specified console to the front. ESC [ 13 ] Unblank the screen. ESC [ 14 ; n ] Set the VESA powerdown interval in minutes. */ } return n, nil } // WriteChars writes the bytes to given writer. func (term *WindowsTerminal) WriteChars(fd uintptr, w io.Writer, p []byte) (n int, err error) { if len(p) == 0 { return 0, nil } return w.Write(p) } const ( CAPSLOCK_ON = 0x0080 //The CAPS LOCK light is on. ENHANCED_KEY = 0x0100 //The key is enhanced. LEFT_ALT_PRESSED = 0x0002 //The left ALT key is pressed. LEFT_CTRL_PRESSED = 0x0008 //The left CTRL key is pressed. NUMLOCK_ON = 0x0020 //The NUM LOCK light is on. RIGHT_ALT_PRESSED = 0x0001 //The right ALT key is pressed. RIGHT_CTRL_PRESSED = 0x0004 //The right CTRL key is pressed. SCROLLLOCK_ON = 0x0040 //The SCROLL LOCK light is on. SHIFT_PRESSED = 0x0010 // The SHIFT key is pressed. ) const ( KEY_CONTROL_PARAM_2 = ";2" KEY_CONTROL_PARAM_3 = ";3" KEY_CONTROL_PARAM_4 = ";4" KEY_CONTROL_PARAM_5 = ";5" KEY_CONTROL_PARAM_6 = ";6" KEY_CONTROL_PARAM_7 = ";7" KEY_CONTROL_PARAM_8 = ";8" KEY_ESC_CSI = "\x1B[" KEY_ESC_N = "\x1BN" KEY_ESC_O = "\x1BO" ) var keyMapPrefix = map[WORD]string{ VK_UP: "\x1B[%sA", VK_DOWN: "\x1B[%sB", VK_RIGHT: "\x1B[%sC", VK_LEFT: "\x1B[%sD", VK_HOME: "\x1B[1%s~", // showkey shows ^[[1 VK_END: "\x1B[4%s~", // showkey shows ^[[4 VK_INSERT: "\x1B[2%s~", VK_DELETE: "\x1B[3%s~", VK_PRIOR: "\x1B[5%s~", VK_NEXT: "\x1B[6%s~", VK_F1: "", VK_F2: "", VK_F3: "\x1B[13%s~", VK_F4: "\x1B[14%s~", VK_F5: "\x1B[15%s~", VK_F6: "\x1B[17%s~", VK_F7: "\x1B[18%s~", VK_F8: "\x1B[19%s~", VK_F9: "\x1B[20%s~", VK_F10: "\x1B[21%s~", VK_F11: "\x1B[23%s~", VK_F12: "\x1B[24%s~", } var arrowKeyMapPrefix = map[WORD]string{ VK_UP: "%s%sA", VK_DOWN: "%s%sB", VK_RIGHT: "%s%sC", VK_LEFT: "%s%sD", } func getControlStateParameter(shift, alt, control, meta bool) string { if shift && alt && control { return KEY_CONTROL_PARAM_8 } if alt && control { return KEY_CONTROL_PARAM_7 } if shift && control { return KEY_CONTROL_PARAM_6 } if control { return KEY_CONTROL_PARAM_5 } if shift && alt { return KEY_CONTROL_PARAM_4 } if alt { return KEY_CONTROL_PARAM_3 } if shift { return KEY_CONTROL_PARAM_2 } return "" } func getControlKeys(controlState DWORD) (shift, alt, control bool) { shift = 0 != (controlState & SHIFT_PRESSED) alt = 0 != (controlState & (LEFT_ALT_PRESSED | RIGHT_ALT_PRESSED)) control = 0 != (controlState & (LEFT_CTRL_PRESSED | RIGHT_CTRL_PRESSED)) return shift, alt, control } func charSequenceForKeys(key WORD, controlState DWORD, escapeSequence []byte) string { i, ok := arrowKeyMapPrefix[key] if ok { shift, alt, control := getControlKeys(controlState) modifier := getControlStateParameter(shift, alt, control, false) return fmt.Sprintf(i, escapeSequence, modifier) } i, ok = keyMapPrefix[key] if ok { shift, alt, control := getControlKeys(controlState) modifier := getControlStateParameter(shift, alt, control, false) return fmt.Sprintf(i, modifier) } return "" } // mapKeystokeToTerminalString maps the given input event record to string func mapKeystokeToTerminalString(keyEvent *KEY_EVENT_RECORD, escapeSequence []byte) string { _, alt, control := getControlKeys(keyEvent.ControlKeyState) if keyEvent.UnicodeChar == 0 { return charSequenceForKeys(keyEvent.VirtualKeyCode, keyEvent.ControlKeyState, escapeSequence) } if control { // TODO(azlinux): Implement following control sequences // -D Signals the end of input from the keyboard; also exits current shell. // -H Deletes the first character to the left of the cursor. Also called the ERASE key. // -Q Restarts printing after it has been stopped with -s. // -S Suspends printing on the screen (does not stop the program). // -U Deletes all characters on the current line. Also called the KILL key. // -E Quits current command and creates a core } // +Key generates ESC N Key if !control && alt { return KEY_ESC_N + strings.ToLower(string(keyEvent.UnicodeChar)) } return string(keyEvent.UnicodeChar) } // getAvailableInputEvents polls the console for availble events // The function does not return until at least one input record has been read. func getAvailableInputEvents(handle uintptr, inputEvents []INPUT_RECORD) (n int, err error) { // TODO(azlinux): Why is there a for loop? Seems to me, that `n` cannot be negative. - tibor for { // Read number of console events available n, err = readConsoleInputKey(handle, inputEvents) if err != nil || n >= 0 { return n, err } } } // getTranslatedKeyCodes converts the input events into the string of characters // The ansi escape sequence are used to map key strokes to the strings func getTranslatedKeyCodes(inputEvents []INPUT_RECORD, escapeSequence []byte) string { var buf bytes.Buffer for i := 0; i < len(inputEvents); i++ { input := inputEvents[i] if input.EventType == KEY_EVENT && input.KeyEvent.KeyDown != 0 { keyString := mapKeystokeToTerminalString(&input.KeyEvent, escapeSequence) buf.WriteString(keyString) } } return buf.String() } // ReadChars reads the characters from the given reader func (term *WindowsTerminal) ReadChars(fd uintptr, r io.Reader, p []byte) (n int, err error) { for term.inputSize == 0 { nr, err := getAvailableInputEvents(fd, term.inputEvents) if nr == 0 && nil != err { return n, err } if nr > 0 { keyCodes := getTranslatedKeyCodes(term.inputEvents[:nr], term.inputEscapeSequence) term.inputSize = copy(term.inputBuffer, keyCodes) } } n = copy(p, term.inputBuffer[:term.inputSize]) term.inputSize -= n return n, nil } // HandleInputSequence interprets the input sequence command func (term *WindowsTerminal) HandleInputSequence(fd uintptr, command []byte) (n int, err error) { return 0, nil } func marshal(c COORD) uintptr { return uintptr(*((*DWORD)(unsafe.Pointer(&c)))) } // IsConsole returns true if the given file descriptor is a terminal. // -- The code assumes that GetConsoleMode will return an error for file descriptors that are not a console. func IsConsole(fd uintptr) bool { _, e := GetConsoleMode(fd) return e == nil } term_emulator.go000066400000000000000000000144531304443252500362040ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/docker/docker/pkg/term/winconsolepackage winconsole import ( "fmt" "io" "strconv" "strings" ) // http://manpages.ubuntu.com/manpages/intrepid/man4/console_codes.4.html const ( ANSI_ESCAPE_PRIMARY = 0x1B ANSI_ESCAPE_SECONDARY = 0x5B ANSI_COMMAND_FIRST = 0x40 ANSI_COMMAND_LAST = 0x7E ANSI_PARAMETER_SEP = ";" ANSI_CMD_G0 = '(' ANSI_CMD_G1 = ')' ANSI_CMD_G2 = '*' ANSI_CMD_G3 = '+' ANSI_CMD_DECPNM = '>' ANSI_CMD_DECPAM = '=' ANSI_CMD_OSC = ']' ANSI_CMD_STR_TERM = '\\' ANSI_BEL = 0x07 KEY_EVENT = 1 ) // Interface that implements terminal handling type terminalEmulator interface { HandleOutputCommand(fd uintptr, command []byte) (n int, err error) HandleInputSequence(fd uintptr, command []byte) (n int, err error) WriteChars(fd uintptr, w io.Writer, p []byte) (n int, err error) ReadChars(fd uintptr, w io.Reader, p []byte) (n int, err error) } type terminalWriter struct { wrappedWriter io.Writer emulator terminalEmulator command []byte inSequence bool fd uintptr } type terminalReader struct { wrappedReader io.ReadCloser emulator terminalEmulator command []byte inSequence bool fd uintptr } // http://manpages.ubuntu.com/manpages/intrepid/man4/console_codes.4.html func isAnsiCommandChar(b byte) bool { switch { case ANSI_COMMAND_FIRST <= b && b <= ANSI_COMMAND_LAST && b != ANSI_ESCAPE_SECONDARY: return true case b == ANSI_CMD_G1 || b == ANSI_CMD_OSC || b == ANSI_CMD_DECPAM || b == ANSI_CMD_DECPNM: // non-CSI escape sequence terminator return true case b == ANSI_CMD_STR_TERM || b == ANSI_BEL: // String escape sequence terminator return true } return false } func isCharacterSelectionCmdChar(b byte) bool { return (b == ANSI_CMD_G0 || b == ANSI_CMD_G1 || b == ANSI_CMD_G2 || b == ANSI_CMD_G3) } func isXtermOscSequence(command []byte, current byte) bool { return (len(command) >= 2 && command[0] == ANSI_ESCAPE_PRIMARY && command[1] == ANSI_CMD_OSC && current != ANSI_BEL) } // Write writes len(p) bytes from p to the underlying data stream. // http://golang.org/pkg/io/#Writer func (tw *terminalWriter) Write(p []byte) (n int, err error) { if len(p) == 0 { return 0, nil } if tw.emulator == nil { return tw.wrappedWriter.Write(p) } // Emulate terminal by extracting commands and executing them totalWritten := 0 start := 0 // indicates start of the next chunk end := len(p) for current := 0; current < end; current++ { if tw.inSequence { // inside escape sequence tw.command = append(tw.command, p[current]) if isAnsiCommandChar(p[current]) { if !isXtermOscSequence(tw.command, p[current]) { // found the last command character. // Now we have a complete command. nchar, err := tw.emulator.HandleOutputCommand(tw.fd, tw.command) totalWritten += nchar if err != nil { return totalWritten, err } // clear the command // don't include current character again tw.command = tw.command[:0] start = current + 1 tw.inSequence = false } } } else { if p[current] == ANSI_ESCAPE_PRIMARY { // entering escape sequnce tw.inSequence = true // indicates end of "normal sequence", write whatever you have so far if len(p[start:current]) > 0 { nw, err := tw.emulator.WriteChars(tw.fd, tw.wrappedWriter, p[start:current]) totalWritten += nw if err != nil { return totalWritten, err } } // include the current character as part of the next sequence tw.command = append(tw.command, p[current]) } } } // note that so far, start of the escape sequence triggers writing out of bytes to console. // For the part _after_ the end of last escape sequence, it is not written out yet. So write it out if !tw.inSequence { // assumption is that we can't be inside sequence and therefore command should be empty if len(p[start:]) > 0 { nw, err := tw.emulator.WriteChars(tw.fd, tw.wrappedWriter, p[start:]) totalWritten += nw if err != nil { return totalWritten, err } } } return totalWritten, nil } // Read reads up to len(p) bytes into p. // http://golang.org/pkg/io/#Reader func (tr *terminalReader) Read(p []byte) (n int, err error) { //Implementations of Read are discouraged from returning a zero byte count // with a nil error, except when len(p) == 0. if len(p) == 0 { return 0, nil } if nil == tr.emulator { return tr.readFromWrappedReader(p) } return tr.emulator.ReadChars(tr.fd, tr.wrappedReader, p) } // Close the underlying stream func (tr *terminalReader) Close() (err error) { return tr.wrappedReader.Close() } func (tr *terminalReader) readFromWrappedReader(p []byte) (n int, err error) { return tr.wrappedReader.Read(p) } type ansiCommand struct { CommandBytes []byte Command string Parameters []string IsSpecial bool } func parseAnsiCommand(command []byte) *ansiCommand { if isCharacterSelectionCmdChar(command[1]) { // Is Character Set Selection commands return &ansiCommand{ CommandBytes: command, Command: string(command), IsSpecial: true, } } // last char is command character lastCharIndex := len(command) - 1 retValue := &ansiCommand{ CommandBytes: command, Command: string(command[lastCharIndex]), IsSpecial: false, } // more than a single escape if lastCharIndex != 0 { start := 1 // skip if double char escape sequence if command[0] == ANSI_ESCAPE_PRIMARY && command[1] == ANSI_ESCAPE_SECONDARY { start++ } // convert this to GetNextParam method retValue.Parameters = strings.Split(string(command[start:lastCharIndex]), ANSI_PARAMETER_SEP) } return retValue } func (c *ansiCommand) getParam(index int) string { if len(c.Parameters) > index { return c.Parameters[index] } return "" } func (ac *ansiCommand) String() string { return fmt.Sprintf("0x%v \"%v\" (\"%v\")", bytesToHex(ac.CommandBytes), ac.Command, strings.Join(ac.Parameters, "\",\"")) } func bytesToHex(b []byte) string { hex := make([]string, len(b)) for i, ch := range b { hex[i] = fmt.Sprintf("%X", ch) } return strings.Join(hex, "") } func parseInt16OrDefault(s string, defaultValue int16) (n int16, err error) { if s == "" { return defaultValue, nil } parsedValue, err := strconv.ParseInt(s, 10, 16) if err != nil { return defaultValue, err } return int16(parsedValue), nil } docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/opencontainers/000077500000000000000000000000001304443252500274475ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/opencontainers/runtime-spec/000077500000000000000000000000001304443252500320625ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/opencontainers/runtime-spec/LICENSE000066400000000000000000000250171304443252500330740ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS Copyright 2015 The Linux Foundation. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. specs-go/000077500000000000000000000000001304443252500335235ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/opencontainers/runtime-specconfig.go000066400000000000000000000526031304443252500353250ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/opencontainers/runtime-spec/specs-gopackage specs import "os" // Spec is the base configuration for the container. type Spec struct { // Version of the Open Container Runtime Specification with which the bundle complies. Version string `json:"ociVersion"` // Platform specifies the configuration's target platform. Platform Platform `json:"platform"` // Process configures the container process. Process Process `json:"process"` // Root configures the container's root filesystem. Root Root `json:"root"` // Hostname configures the container's hostname. Hostname string `json:"hostname,omitempty"` // Mounts configures additional mounts (on top of Root). Mounts []Mount `json:"mounts,omitempty"` // Hooks configures callbacks for container lifecycle events. Hooks Hooks `json:"hooks"` // Annotations contains arbitrary metadata for the container. Annotations map[string]string `json:"annotations,omitempty"` // Linux is platform specific configuration for Linux based containers. Linux *Linux `json:"linux,omitempty" platform:"linux"` // Solaris is platform specific configuration for Solaris containers. Solaris *Solaris `json:"solaris,omitempty" platform:"solaris"` // Windows is platform specific configuration for Windows based containers, including Hyper-V containers. Windows *Windows `json:"windows,omitempty" platform:"windows"` } // Process contains information to start a specific application inside the container. type Process struct { // Terminal creates an interactive terminal for the container. Terminal bool `json:"terminal,omitempty"` // ConsoleSize specifies the size of the console. ConsoleSize Box `json:"consoleSize,omitempty"` // User specifies user information for the process. User User `json:"user"` // Args specifies the binary and arguments for the application to execute. Args []string `json:"args"` // Env populates the process environment for the process. Env []string `json:"env,omitempty"` // Cwd is the current working directory for the process and must be // relative to the container's root. Cwd string `json:"cwd"` // Capabilities are Linux capabilities that are kept for the container. Capabilities []string `json:"capabilities,omitempty" platform:"linux"` // Rlimits specifies rlimit options to apply to the process. Rlimits []Rlimit `json:"rlimits,omitempty" platform:"linux"` // NoNewPrivileges controls whether additional privileges could be gained by processes in the container. NoNewPrivileges bool `json:"noNewPrivileges,omitempty" platform:"linux"` // ApparmorProfile specifies the apparmor profile for the container. ApparmorProfile string `json:"apparmorProfile,omitempty" platform:"linux"` // SelinuxLabel specifies the selinux context that the container process is run as. SelinuxLabel string `json:"selinuxLabel,omitempty" platform:"linux"` } // Box specifies dimensions of a rectangle. Used for specifying the size of a console. type Box struct { // Height is the vertical dimension of a box. Height uint `json:"height"` // Width is the horizontal dimension of a box. Width uint `json:"width"` } // User specifies specific user (and group) information for the container process. type User struct { // UID is the user id. UID uint32 `json:"uid" platform:"linux,solaris"` // GID is the group id. GID uint32 `json:"gid" platform:"linux,solaris"` // AdditionalGids are additional group ids set for the container's process. AdditionalGids []uint32 `json:"additionalGids,omitempty" platform:"linux,solaris"` // Username is the user name. Username string `json:"username,omitempty" platform:"windows"` } // Root contains information about the container's root filesystem on the host. type Root struct { // Path is the absolute path to the container's root filesystem. Path string `json:"path"` // Readonly makes the root filesystem for the container readonly before the process is executed. Readonly bool `json:"readonly,omitempty"` } // Platform specifies OS and arch information for the host system that the container // is created for. type Platform struct { // OS is the operating system. OS string `json:"os"` // Arch is the architecture Arch string `json:"arch"` } // Mount specifies a mount for a container. type Mount struct { // Destination is the path where the mount will be placed relative to the container's root. The path and child directories MUST exist, a runtime MUST NOT create directories automatically to a mount point. Destination string `json:"destination"` // Type specifies the mount kind. Type string `json:"type"` // Source specifies the source path of the mount. In the case of bind mounts on // Linux based systems this would be the file on the host. Source string `json:"source"` // Options are fstab style mount options. Options []string `json:"options,omitempty"` } // Hook specifies a command that is run at a particular event in the lifecycle of a container type Hook struct { Path string `json:"path"` Args []string `json:"args,omitempty"` Env []string `json:"env,omitempty"` Timeout *int `json:"timeout,omitempty"` } // Hooks for container setup and teardown type Hooks struct { // Prestart is a list of hooks to be run before the container process is executed. // On Linux, they are run after the container namespaces are created. Prestart []Hook `json:"prestart,omitempty"` // Poststart is a list of hooks to be run after the container process is started. Poststart []Hook `json:"poststart,omitempty"` // Poststop is a list of hooks to be run after the container process exits. Poststop []Hook `json:"poststop,omitempty"` } // Linux contains platform specific configuration for Linux based containers. type Linux struct { // UIDMapping specifies user mappings for supporting user namespaces on Linux. UIDMappings []IDMapping `json:"uidMappings,omitempty"` // GIDMapping specifies group mappings for supporting user namespaces on Linux. GIDMappings []IDMapping `json:"gidMappings,omitempty"` // Sysctl are a set of key value pairs that are set for the container on start Sysctl map[string]string `json:"sysctl,omitempty"` // Resources contain cgroup information for handling resource constraints // for the container Resources *Resources `json:"resources,omitempty"` // CgroupsPath specifies the path to cgroups that are created and/or joined by the container. // The path is expected to be relative to the cgroups mountpoint. // If resources are specified, the cgroups at CgroupsPath will be updated based on resources. CgroupsPath *string `json:"cgroupsPath,omitempty"` // Namespaces contains the namespaces that are created and/or joined by the container Namespaces []Namespace `json:"namespaces,omitempty"` // Devices are a list of device nodes that are created for the container Devices []Device `json:"devices,omitempty"` // Seccomp specifies the seccomp security settings for the container. Seccomp *Seccomp `json:"seccomp,omitempty"` // RootfsPropagation is the rootfs mount propagation mode for the container. RootfsPropagation string `json:"rootfsPropagation,omitempty"` // MaskedPaths masks over the provided paths inside the container. MaskedPaths []string `json:"maskedPaths,omitempty"` // ReadonlyPaths sets the provided paths as RO inside the container. ReadonlyPaths []string `json:"readonlyPaths,omitempty"` // MountLabel specifies the selinux context for the mounts in the container. MountLabel string `json:"mountLabel,omitempty"` } // Namespace is the configuration for a Linux namespace type Namespace struct { // Type is the type of Linux namespace Type NamespaceType `json:"type"` // Path is a path to an existing namespace persisted on disk that can be joined // and is of the same type Path string `json:"path,omitempty"` } // NamespaceType is one of the Linux namespaces type NamespaceType string const ( // PIDNamespace for isolating process IDs PIDNamespace NamespaceType = "pid" // NetworkNamespace for isolating network devices, stacks, ports, etc NetworkNamespace = "network" // MountNamespace for isolating mount points MountNamespace = "mount" // IPCNamespace for isolating System V IPC, POSIX message queues IPCNamespace = "ipc" // UTSNamespace for isolating hostname and NIS domain name UTSNamespace = "uts" // UserNamespace for isolating user and group IDs UserNamespace = "user" // CgroupNamespace for isolating cgroup hierarchies CgroupNamespace = "cgroup" ) // IDMapping specifies UID/GID mappings type IDMapping struct { // HostID is the UID/GID of the host user or group HostID uint32 `json:"hostID"` // ContainerID is the UID/GID of the container's user or group ContainerID uint32 `json:"containerID"` // Size is the length of the range of IDs mapped between the two namespaces Size uint32 `json:"size"` } // Rlimit type and restrictions type Rlimit struct { // Type of the rlimit to set Type string `json:"type"` // Hard is the hard limit for the specified type Hard uint64 `json:"hard"` // Soft is the soft limit for the specified type Soft uint64 `json:"soft"` } // HugepageLimit structure corresponds to limiting kernel hugepages type HugepageLimit struct { // Pagesize is the hugepage size Pagesize *string `json:"pageSize,omitempty"` // Limit is the limit of "hugepagesize" hugetlb usage Limit *uint64 `json:"limit,omitempty"` } // InterfacePriority for network interfaces type InterfacePriority struct { // Name is the name of the network interface Name string `json:"name"` // Priority for the interface Priority uint32 `json:"priority"` } // blockIODevice holds major:minor format supported in blkio cgroup type blockIODevice struct { // Major is the device's major number. Major int64 `json:"major"` // Minor is the device's minor number. Minor int64 `json:"minor"` } // WeightDevice struct holds a `major:minor weight` pair for blkioWeightDevice type WeightDevice struct { blockIODevice // Weight is the bandwidth rate for the device, range is from 10 to 1000 Weight *uint16 `json:"weight,omitempty"` // LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, CFQ scheduler only LeafWeight *uint16 `json:"leafWeight,omitempty"` } // ThrottleDevice struct holds a `major:minor rate_per_second` pair type ThrottleDevice struct { blockIODevice // Rate is the IO rate limit per cgroup per device Rate *uint64 `json:"rate,omitempty"` } // BlockIO for Linux cgroup 'blkio' resource management type BlockIO struct { // Specifies per cgroup weight, range is from 10 to 1000 Weight *uint16 `json:"blkioWeight,omitempty"` // Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, CFQ scheduler only LeafWeight *uint16 `json:"blkioLeafWeight,omitempty"` // Weight per cgroup per device, can override BlkioWeight WeightDevice []WeightDevice `json:"blkioWeightDevice,omitempty"` // IO read rate limit per cgroup per device, bytes per second ThrottleReadBpsDevice []ThrottleDevice `json:"blkioThrottleReadBpsDevice,omitempty"` // IO write rate limit per cgroup per device, bytes per second ThrottleWriteBpsDevice []ThrottleDevice `json:"blkioThrottleWriteBpsDevice,omitempty"` // IO read rate limit per cgroup per device, IO per second ThrottleReadIOPSDevice []ThrottleDevice `json:"blkioThrottleReadIOPSDevice,omitempty"` // IO write rate limit per cgroup per device, IO per second ThrottleWriteIOPSDevice []ThrottleDevice `json:"blkioThrottleWriteIOPSDevice,omitempty"` } // Memory for Linux cgroup 'memory' resource management type Memory struct { // Memory limit (in bytes). Limit *uint64 `json:"limit,omitempty"` // Memory reservation or soft_limit (in bytes). Reservation *uint64 `json:"reservation,omitempty"` // Total memory limit (memory + swap). Swap *uint64 `json:"swap,omitempty"` // Kernel memory limit (in bytes). Kernel *uint64 `json:"kernel,omitempty"` // Kernel memory limit for tcp (in bytes) KernelTCP *uint64 `json:"kernelTCP,omitempty"` // How aggressive the kernel will swap memory pages. Range from 0 to 100. Swappiness *uint64 `json:"swappiness,omitempty"` } // CPU for Linux cgroup 'cpu' resource management type CPU struct { // CPU shares (relative weight (ratio) vs. other cgroups with cpu shares). Shares *uint64 `json:"shares,omitempty"` // CPU hardcap limit (in usecs). Allowed cpu time in a given period. Quota *uint64 `json:"quota,omitempty"` // CPU period to be used for hardcapping (in usecs). Period *uint64 `json:"period,omitempty"` // How much time realtime scheduling may use (in usecs). RealtimeRuntime *uint64 `json:"realtimeRuntime,omitempty"` // CPU period to be used for realtime scheduling (in usecs). RealtimePeriod *uint64 `json:"realtimePeriod,omitempty"` // CPUs to use within the cpuset. Default is to use any CPU available. Cpus *string `json:"cpus,omitempty"` // List of memory nodes in the cpuset. Default is to use any available memory node. Mems *string `json:"mems,omitempty"` } // Pids for Linux cgroup 'pids' resource management (Linux 4.3) type Pids struct { // Maximum number of PIDs. Default is "no limit". Limit *int64 `json:"limit,omitempty"` } // Network identification and priority configuration type Network struct { // Set class identifier for container's network packets ClassID *uint32 `json:"classID,omitempty"` // Set priority of network traffic for container Priorities []InterfacePriority `json:"priorities,omitempty"` } // Resources has container runtime resource constraints type Resources struct { // Devices configures the device whitelist. Devices []DeviceCgroup `json:"devices,omitempty"` // DisableOOMKiller disables the OOM killer for out of memory conditions DisableOOMKiller *bool `json:"disableOOMKiller,omitempty"` // Specify an oom_score_adj for the container. OOMScoreAdj *int `json:"oomScoreAdj,omitempty"` // Memory restriction configuration Memory *Memory `json:"memory,omitempty"` // CPU resource restriction configuration CPU *CPU `json:"cpu,omitempty"` // Task resource restriction configuration. Pids *Pids `json:"pids,omitempty"` // BlockIO restriction configuration BlockIO *BlockIO `json:"blockIO,omitempty"` // Hugetlb limit (in bytes) HugepageLimits []HugepageLimit `json:"hugepageLimits,omitempty"` // Network restriction configuration Network *Network `json:"network,omitempty"` } // Device represents the mknod information for a Linux special device file type Device struct { // Path to the device. Path string `json:"path"` // Device type, block, char, etc. Type string `json:"type"` // Major is the device's major number. Major int64 `json:"major"` // Minor is the device's minor number. Minor int64 `json:"minor"` // FileMode permission bits for the device. FileMode *os.FileMode `json:"fileMode,omitempty"` // UID of the device. UID *uint32 `json:"uid,omitempty"` // Gid of the device. GID *uint32 `json:"gid,omitempty"` } // DeviceCgroup represents a device rule for the whitelist controller type DeviceCgroup struct { // Allow or deny Allow bool `json:"allow"` // Device type, block, char, etc. Type *string `json:"type,omitempty"` // Major is the device's major number. Major *int64 `json:"major,omitempty"` // Minor is the device's minor number. Minor *int64 `json:"minor,omitempty"` // Cgroup access permissions format, rwm. Access *string `json:"access,omitempty"` } // Seccomp represents syscall restrictions type Seccomp struct { DefaultAction Action `json:"defaultAction"` Architectures []Arch `json:"architectures"` Syscalls []Syscall `json:"syscalls,omitempty"` } // Solaris contains platform specific configuration for Solaris application containers. type Solaris struct { // SMF FMRI which should go "online" before we start the container process. Milestone string `json:"milestone,omitempty"` // Maximum set of privileges any process in this container can obtain. LimitPriv string `json:"limitpriv,omitempty"` // The maximum amount of shared memory allowed for this container. MaxShmMemory string `json:"maxShmMemory,omitempty"` // Specification for automatic creation of network resources for this container. Anet []Anet `json:"anet,omitempty"` // Set limit on the amount of CPU time that can be used by container. CappedCPU *CappedCPU `json:"cappedCPU,omitempty"` // The physical and swap caps on the memory that can be used by this container. CappedMemory *CappedMemory `json:"cappedMemory,omitempty"` } // CappedCPU allows users to set limit on the amount of CPU time that can be used by container. type CappedCPU struct { Ncpus string `json:"ncpus,omitempty"` } // CappedMemory allows users to set the physical and swap caps on the memory that can be used by this container. type CappedMemory struct { Physical string `json:"physical,omitempty"` Swap string `json:"swap,omitempty"` } // Anet provides the specification for automatic creation of network resources for this container. type Anet struct { // Specify a name for the automatically created VNIC datalink. Linkname string `json:"linkname,omitempty"` // Specify the link over which the VNIC will be created. Lowerlink string `json:"lowerLink,omitempty"` // The set of IP addresses that the container can use. Allowedaddr string `json:"allowedAddress,omitempty"` // Specifies whether allowedAddress limitation is to be applied to the VNIC. Configallowedaddr string `json:"configureAllowedAddress,omitempty"` // The value of the optional default router. Defrouter string `json:"defrouter,omitempty"` // Enable one or more types of link protection. Linkprotection string `json:"linkProtection,omitempty"` // Set the VNIC's macAddress Macaddress string `json:"macAddress,omitempty"` } // Windows defines the runtime configuration for Windows based containers, including Hyper-V containers. type Windows struct { // Resources contains information for handling resource constraints for the container. Resources *WindowsResources `json:"resources,omitempty"` } // WindowsResources has container runtime resource constraints for containers running on Windows. type WindowsResources struct { // Memory restriction configuration. Memory *WindowsMemoryResources `json:"memory,omitempty"` // CPU resource restriction configuration. CPU *WindowsCPUResources `json:"cpu,omitempty"` // Storage restriction configuration. Storage *WindowsStorageResources `json:"storage,omitempty"` // Network restriction configuration. Network *WindowsNetworkResources `json:"network,omitempty"` } // WindowsMemoryResources contains memory resource management settings. type WindowsMemoryResources struct { // Memory limit in bytes. Limit *uint64 `json:"limit,omitempty"` // Memory reservation in bytes. Reservation *uint64 `json:"reservation,omitempty"` } // WindowsCPUResources contains CPU resource management settings. type WindowsCPUResources struct { // Number of CPUs available to the container. Count *uint64 `json:"count,omitempty"` // CPU shares (relative weight to other containers with cpu shares). Range is from 1 to 10000. Shares *uint16 `json:"shares,omitempty"` // Percent of available CPUs usable by the container. Percent *uint8 `json:"percent,omitempty"` } // WindowsStorageResources contains storage resource management settings. type WindowsStorageResources struct { // Specifies maximum Iops for the system drive. Iops *uint64 `json:"iops,omitempty"` // Specifies maximum bytes per second for the system drive. Bps *uint64 `json:"bps,omitempty"` // Sandbox size specifies the minimum size of the system drive in bytes. SandboxSize *uint64 `json:"sandboxSize,omitempty"` } // WindowsNetworkResources contains network resource management settings. type WindowsNetworkResources struct { // EgressBandwidth is the maximum egress bandwidth in bytes per second. EgressBandwidth *uint64 `json:"egressBandwidth,omitempty"` } // Arch used for additional architectures type Arch string // Additional architectures permitted to be used for system calls // By default only the native architecture of the kernel is permitted const ( ArchX86 Arch = "SCMP_ARCH_X86" ArchX86_64 Arch = "SCMP_ARCH_X86_64" ArchX32 Arch = "SCMP_ARCH_X32" ArchARM Arch = "SCMP_ARCH_ARM" ArchAARCH64 Arch = "SCMP_ARCH_AARCH64" ArchMIPS Arch = "SCMP_ARCH_MIPS" ArchMIPS64 Arch = "SCMP_ARCH_MIPS64" ArchMIPS64N32 Arch = "SCMP_ARCH_MIPS64N32" ArchMIPSEL Arch = "SCMP_ARCH_MIPSEL" ArchMIPSEL64 Arch = "SCMP_ARCH_MIPSEL64" ArchMIPSEL64N32 Arch = "SCMP_ARCH_MIPSEL64N32" ArchPPC Arch = "SCMP_ARCH_PPC" ArchPPC64 Arch = "SCMP_ARCH_PPC64" ArchPPC64LE Arch = "SCMP_ARCH_PPC64LE" ArchS390 Arch = "SCMP_ARCH_S390" ArchS390X Arch = "SCMP_ARCH_S390X" ) // Action taken upon Seccomp rule match type Action string // Define actions for Seccomp rules const ( ActKill Action = "SCMP_ACT_KILL" ActTrap Action = "SCMP_ACT_TRAP" ActErrno Action = "SCMP_ACT_ERRNO" ActTrace Action = "SCMP_ACT_TRACE" ActAllow Action = "SCMP_ACT_ALLOW" ) // Operator used to match syscall arguments in Seccomp type Operator string // Define operators for syscall arguments in Seccomp const ( OpNotEqual Operator = "SCMP_CMP_NE" OpLessThan Operator = "SCMP_CMP_LT" OpLessEqual Operator = "SCMP_CMP_LE" OpEqualTo Operator = "SCMP_CMP_EQ" OpGreaterEqual Operator = "SCMP_CMP_GE" OpGreaterThan Operator = "SCMP_CMP_GT" OpMaskedEqual Operator = "SCMP_CMP_MASKED_EQ" ) // Arg used for matching specific syscall arguments in Seccomp type Arg struct { Index uint `json:"index"` Value uint64 `json:"value"` ValueTwo uint64 `json:"valueTwo"` Op Operator `json:"op"` } // Syscall is used to match a syscall in Seccomp type Syscall struct { Name string `json:"name"` Action Action `json:"action"` Args []Arg `json:"args,omitempty"` } state.go000066400000000000000000000011721304443252500351730ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/opencontainers/runtime-spec/specs-gopackage specs // State holds information about the runtime state of the container. type State struct { // Version is the version of the specification that is supported. Version string `json:"version"` // ID is the container ID ID string `json:"id"` // Status is the runtime state of the container. Status string `json:"status"` // Pid is the process ID for the container process. Pid int `json:"pid"` // BundlePath is the path to the container's bundle directory. BundlePath string `json:"bundlePath"` // Annotations are the annotations associated with the container. Annotations map[string]string `json:"annotations"` } version.go000066400000000000000000000010341304443252500355350ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/Godeps/_workspace/src/github.com/opencontainers/runtime-spec/specs-gopackage specs import "fmt" const ( // VersionMajor is for an API incompatible changes VersionMajor = 1 // VersionMinor is for functionality in a backwards-compatible manner VersionMinor = 0 // VersionPatch is for backwards-compatible bug fixes VersionPatch = 0 // VersionDev indicates development branch. Releases will be empty string. VersionDev = "-rc2-dev" ) // Version is the specification version that the package types support. var Version = fmt.Sprintf("%d.%d.%d%s", VersionMajor, VersionMinor, VersionPatch, VersionDev) docker-runc-tags-docker-1.13.1/LICENSE000066400000000000000000000250061304443252500172240ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS Copyright 2014 Docker, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. docker-runc-tags-docker-1.13.1/MAINTAINERS000066400000000000000000000006431304443252500177140ustar00rootroot00000000000000Michael Crosby (@crosbymichael) Rohit Jnagal (@rjnagal) Victor Marmol (@vmarmol) Mrunal Patel (@mrunalp) Alexander Morozov (@LK4D4) Daniel, Dao Quang Minh (@dqminh) Andrey Vagin (@avagin) Qiang Huang (@hqhq) Aleksa Sarai (@cyphar) docker-runc-tags-docker-1.13.1/MAINTAINERS_GUIDE.md000066400000000000000000000127341304443252500212340ustar00rootroot00000000000000## Introduction Dear maintainer. Thank you for investing the time and energy to help make runc as useful as possible. Maintaining a project is difficult, sometimes unrewarding work. Sure, you will get to contribute cool features to the project. But most of your time will be spent reviewing, cleaning up, documenting, answering questions, justifying design decisions - while everyone has all the fun! But remember - the quality of the maintainers work is what distinguishes the good projects from the great. So please be proud of your work, even the unglamourous parts, and encourage a culture of appreciation and respect for *every* aspect of improving the project - not just the hot new features. This document is a manual for maintainers old and new. It explains what is expected of maintainers, how they should work, and what tools are available to them. This is a living document - if you see something out of date or missing, speak up! ## What are a maintainer's responsibility? It is every maintainer's responsibility to: * 1) Expose a clear roadmap for improving their component. * 2) Deliver prompt feedback and decisions on pull requests. * 3) Be available to anyone with questions, bug reports, criticism etc. on their component. This includes IRC and GitHub issues and pull requests. * 4) Make sure their component respects the philosophy, design and roadmap of the project. ## How are decisions made? Short answer: with pull requests to the runc repository. runc is an open-source project with an open design philosophy. This means that the repository is the source of truth for EVERY aspect of the project, including its philosophy, design, roadmap and APIs. *If it's part of the project, it's in the repo. It's in the repo, it's part of the project.* As a result, all decisions can be expressed as changes to the repository. An implementation change is a change to the source code. An API change is a change to the API specification. A philosophy change is a change to the philosophy manifesto. And so on. All decisions affecting runc, big and small, follow the same 3 steps: * Step 1: Open a pull request. Anyone can do this. * Step 2: Discuss the pull request. Anyone can do this. * Step 3: Accept (`LGTM`) or refuse a pull request. The relevant maintainers do this (see below "Who decides what?") ### I'm a maintainer, should I make pull requests too? Yes. Nobody should ever push to master directly. All changes should be made through a pull request. ## Who decides what? All decisions are pull requests, and the relevant maintainers make decisions by accepting or refusing the pull request. Review and acceptance by anyone is denoted by adding a comment in the pull request: `LGTM`. However, only currently listed `MAINTAINERS` are counted towards the required two LGTMs. Overall the maintainer system works because of mutual respect across the maintainers of the project. The maintainers trust one another to make decisions in the best interests of the project. Sometimes maintainers can disagree and this is part of a healthy project to represent the point of views of various people. In the case where maintainers cannot find agreement on a specific change the role of a Chief Maintainer comes into play. The Chief Maintainer for the project is responsible for overall architecture of the project to maintain conceptual integrity. Large decisions and architecture changes should be reviewed by the chief maintainer. The current chief maintainer for the project is Michael Crosby (@crosbymichael). Even though the maintainer system is built on trust, if there is a conflict with the chief maintainer on a decision, their decision can be challenged and brought to the technical oversight board if two-thirds of the maintainers vote for an appeal. It is expected that this would be a very exceptional event. ### How are maintainers added? The best maintainers have a vested interest in the project. Maintainers are first and foremost contributors that have shown they are committed to the long term success of the project. Contributors wanting to become maintainers are expected to be deeply involved in contributing code, pull request review, and triage of issues in the project for more than two months. Just contributing does not make you a maintainer, it is about building trust with the current maintainers of the project and being a person that they can depend on and trust to make decisions in the best interest of the project. The final vote to add a new maintainer should be approved by over 66% of the current maintainers with the chief maintainer having veto power. In case of a veto, conflict resolution rules expressed above apply. The voting period is five business days on the Pull Request to add the new maintainer. ### What is expected of maintainers? Part of a healthy project is to have active maintainers to support the community in contributions and perform tasks to keep the project running. Maintainers are expected to be able to respond in a timely manner if their help is required on specific issues where they are pinged. Being a maintainer is a time consuming commitment and should not be taken lightly. When a maintainer is unable to perform the required duties they can be removed with a vote by 66% of the current maintainers with the chief maintainer having veto power. The voting period is ten business days. Issues related to a maintainer's performance should be discussed with them among the other maintainers so that they are not surprised by a pull request removing them. docker-runc-tags-docker-1.13.1/Makefile000066400000000000000000000066351304443252500176660ustar00rootroot00000000000000.PHONY: dbuild man \ localtest localunittest localintegration \ test unittest integration PREFIX := $(DESTDIR)/usr/local BINDIR := $(PREFIX)/sbin GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null) GIT_BRANCH_CLEAN := $(shell echo $(GIT_BRANCH) | sed -e "s/[^[:alnum:]]/-/g") RUNC_IMAGE := runc_dev$(if $(GIT_BRANCH_CLEAN),:$(GIT_BRANCH_CLEAN)) PROJECT := github.com/opencontainers/runc TEST_DOCKERFILE := script/test_Dockerfile BUILDTAGS := seccomp COMMIT_NO := $(shell git rev-parse HEAD 2> /dev/null || true) COMMIT := $(if $(shell git status --porcelain --untracked-files=no),"${COMMIT_NO}-dirty","${COMMIT_NO}") RUNC_LINK := $(CURDIR)/Godeps/_workspace/src/github.com/opencontainers/runc export GOPATH := $(CURDIR)/Godeps/_workspace MAN_DIR := $(CURDIR)/man/man8 MAN_PAGES = $(shell ls $(MAN_DIR)/*.8) MAN_PAGES_BASE = $(notdir $(MAN_PAGES)) MAN_INSTALL_PATH := ${PREFIX}/share/man/man8/ RELEASE_DIR := $(CURDIR)/release VERSION := ${shell cat ./VERSION} SHELL := $(shell command -v bash 2>/dev/null) all: $(RUNC_LINK) go build -i -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION}" -tags "$(BUILDTAGS)" -o runc . static: $(RUNC_LINK) CGO_ENABLED=1 go build -i -tags "$(BUILDTAGS) cgo static_build" -ldflags "-w -extldflags -static -X main.gitCommit=${COMMIT} -X main.version=${VERSION}" -o runc . release: $(RUNC_LINK) @flag_list=(seccomp selinux apparmor static ambient); \ unset expression; \ for flag in "$${flag_list[@]}"; do \ expression+="' '{'',$${flag}}"; \ done; \ eval profile_list=("$$expression"); \ for profile in "$${profile_list[@]}"; do \ output=${RELEASE_DIR}/runc; \ for flag in $$profile; do \ output+=."$$flag"; \ done; \ tags="$$profile"; \ ldflags="-X main.gitCommit=${COMMIT} -X main.version=${VERSION}"; \ CGO_ENABLED=; \ [[ "$$profile" =~ static ]] && { \ tags="$${tags/static/static_build}"; \ tags+=" cgo"; \ ldflags+=" -w -extldflags -static"; \ CGO_ENABLED=1; \ }; \ echo "Building target: $$output"; \ rm -rf "${GOPATH}/pkg"; \ go build -i -ldflags "$$ldflags" -tags "$$tags" -o "$$output" .; \ done $(RUNC_LINK): ln -sfn $(CURDIR) $(RUNC_LINK) dbuild: runcimage docker run --rm -v $(CURDIR):/go/src/$(PROJECT) --privileged $(RUNC_IMAGE) make lint: go vet ./... go fmt ./... man: man/md2man-all.sh runcimage: docker build -t $(RUNC_IMAGE) . test: make unittest integration localtest: make localunittest localintegration unittest: runcimage docker run -e TESTFLAGS -ti --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localunittest localunittest: all go test -timeout 3m -tags "$(BUILDTAGS)" ${TESTFLAGS} -v ./... integration: runcimage docker run -e TESTFLAGS -t --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localintegration localintegration: all bats -t tests/integration${TESTFLAGS} install: install -D -m0755 runc $(BINDIR)/runc install-bash: install -D -m0644 contrib/completions/bash/runc $(PREFIX)/share/bash-completion/completions/runc install-man: install -d -m 755 $(MAN_INSTALL_PATH) install -m 644 $(MAN_PAGES) $(MAN_INSTALL_PATH) uninstall: rm -f $(BINDIR)/runc uninstall-bash: rm -f $(PREFIX)/share/bash-completion/completions/runc uninstall-man: rm -f $(addprefix $(MAN_INSTALL_PATH),$(MAN_PAGES_BASE)) clean: rm -f runc rm -f $(RUNC_LINK) rm -rf $(GOPATH)/pkg rm -rf $(RELEASE_DIR) validate: script/validate-gofmt go vet ./... ci: validate localtest docker-runc-tags-docker-1.13.1/NOTICE000066400000000000000000000010061304443252500171150ustar00rootroot00000000000000runc Copyright 2012-2015 Docker, Inc. This product includes software developed at Docker, Inc. (http://www.docker.com). The following is courtesy of our legal counsel: Use and transfer of Docker may be subject to certain restrictions by the United States and other governments. It is your responsibility to ensure that your use and/or transfer does not violate applicable laws. For more information, please see http://www.bis.doc.gov See also http://www.apache.org/dev/crypto.html and/or seek legal counsel. docker-runc-tags-docker-1.13.1/PRINCIPLES.md000066400000000000000000000021631304443252500202100ustar00rootroot00000000000000# runc principles In the design and development of runc and libcontainer we try to follow these principles: (Work in progress) * Don't try to replace every tool. Instead, be an ingredient to improve them. * Less code is better. * Fewer components are better. Do you really need to add one more class? * 50 lines of straightforward, readable code is better than 10 lines of magic that nobody can understand. * Don't do later what you can do now. "//TODO: refactor" is not acceptable in new code. * When hesitating between two options, choose the one that is easier to reverse. * "No" is temporary; "Yes" is forever. If you're not sure about a new feature, say no. You can change your mind later. * Containers must be portable to the greatest possible number of machines. Be suspicious of any change which makes machines less interchangeable. * The fewer moving parts in a container, the better. * Don't merge it unless you document it. * Don't document it unless you can keep it up-to-date. * Don't merge it unless you test it! * Everyone's problem is slightly different. Focus on the part that is the same for everyone, and solve that. docker-runc-tags-docker-1.13.1/README.md000066400000000000000000000144521304443252500175010ustar00rootroot00000000000000[![Build Status](https://jenkins.dockerproject.org/buildStatus/icon?job=runc Master)](https://jenkins.dockerproject.org/job/runc Master) ## runc `runc` is a CLI tool for spawning and running containers according to the OCI specification. ## Releases `runc` depends on and tracks the [runtime-spec](https://github.com/opencontainers/runtime-spec) repository. We will try to make sure that `runc` and the OCI specification major versions stay in lockstep. This means that `runc` 1.0.0 should implement the 1.0 version of the specification. You can find official releases of `runc` on the [release](https://github.com/opencontainers/runc/releases) page. ## Building `runc` currently supports the Linux platform with various architecture support. It must be built with Go version 1.6 or higher in order for some features to function properly. In order to enable seccomp support you will need to install `libseccomp` on your platform. > e.g. `libseccomp-devel` for CentOS, or `libseccomp-dev` for Ubuntu Otherwise, if you do not want to build `runc` with seccomp support you can add `BUILDTAGS=""` when running make. ```bash # create a 'github.com/opencontainers' in your GOPATH/src cd github.com/opencontainers git clone https://github.com/opencontainers/runc cd runc make sudo make install ``` `runc` will be installed to `/usr/local/sbin/runc` on your system. #### Build Tags `runc` supports optional build tags for compiling support of various features. To add build tags to the make option the `BUILDTAGS` variable must be set. ```bash make BUILDTAGS='seccomp apparmor' ``` | Build Tag | Feature | Dependency | |-----------|------------------------------------|-------------| | seccomp | Syscall filtering | libseccomp | | selinux | selinux process and mount labeling | | | apparmor | apparmor profile support | libapparmor | | ambient | ambient capability support | kernel 4.3 | ### Running the test suite `runc` currently supports running its test suite via Docker. To run the suite just type `make test`. ```bash make test ``` There are additional make targets for running the tests outside of a container but this is not recommended as the tests are written with the expectation that they can write and remove anywhere. You can run a specific test case by setting the `TESTFLAGS` variable. ```bash # make test TESTFLAGS="-run=SomeTestFunction" ``` ## Using runc ### Creating an OCI Bundle In order to use runc you must have your container in the format of an OCI bundle. If you have Docker installed you can use its `export` method to acquire a root filesystem from an existing Docker container. ```bash # create the top most bundle directory mkdir /mycontainer cd /mycontainer # create the rootfs directory mkdir rootfs # export busybox via Docker into the rootfs directory docker export $(docker create busybox) | tar -C rootfs -xvf - ``` After a root filesystem is populated you just generate a spec in the format of a `config.json` file inside your bundle. `runc` provides a `spec` command to generate a base template spec that you are then able to edit. To find features and documentation for fields in the spec please refer to the [specs](https://github.com/opencontainers/runtime-spec) repository. ```bash runc spec ``` ### Running Containers Assuming you have an OCI bundle from the previous step you can execute the container in two different ways. The first way is to use the convenience command `run` that will handle creating, starting, and deleting the container after it exits. ```bash cd /mycontainer runc run mycontainerid ``` If you used the unmodified `runc spec` template this should give you a `sh` session inside the container. The second way to start a container is using the specs lifecycle operations. This gives you more power over how the container is created and managed while it is running. This will also launch the container in the background so you will have to edit the `config.json` to remove the `terminal` setting for the simple examples here. Your process field in the `config.json` should look like this below with `"terminal": false` and `"args": ["sleep", "5"]`. ```json "process": { "terminal": false, "user": { "uid": 0, "gid": 0 }, "args": [ "sleep", "5" ], "env": [ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "TERM=xterm" ], "cwd": "/", "capabilities": [ "CAP_AUDIT_WRITE", "CAP_KILL", "CAP_NET_BIND_SERVICE" ], "rlimits": [ { "type": "RLIMIT_NOFILE", "hard": 1024, "soft": 1024 } ], "noNewPrivileges": true }, ``` Now we can go though the lifecycle operations in your shell. ```bash cd /mycontainer runc create mycontainerid # view the container is created and in the "created" state runc list # start the process inside the container runc start mycontainerid # after 5 seconds view that the container has exited and is now in the stopped state runc list # now delete the container runc delete mycontainerid ``` This adds more complexity but allows higher level systems to manage runc and provides points in the containers creation to setup various settings after the container has created and/or before it is deleted. This is commonly used to setup the container's network stack after `create` but before `start` where the user's defined process will be running. #### Supervisors `runc` can be used with process supervisors and init systems to ensure that containers are restarted when they exit. An example systemd unit file looks something like this. ```systemd [Unit] Description=Start My Container [Service] Type=forking ExecStart=/usr/local/sbin/runc run -d --pid-file /run/mycontainerid.pid mycontainerid ExecStopPost=/usr/local/sbin/runc delete mycontainerid WorkingDirectory=/mycontainer PIDFile=/run/mycontainerid.pid [Install] WantedBy=multi-user.target ``` docker-runc-tags-docker-1.13.1/VERSION000066400000000000000000000000121304443252500172550ustar00rootroot000000000000001.0.0-rc2 docker-runc-tags-docker-1.13.1/checkpoint.go000066400000000000000000000073551304443252500207040ustar00rootroot00000000000000// +build linux package main import ( "fmt" "strconv" "strings" "syscall" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runtime-spec/specs-go" "github.com/urfave/cli" ) var checkpointCommand = cli.Command{ Name: "checkpoint", Usage: "checkpoint a running container", ArgsUsage: ` Where "" is the name for the instance of the container to be checkpointed.`, Description: `The checkpoint command saves the state of the container instance.`, Flags: []cli.Flag{ cli.StringFlag{Name: "image-path", Value: "", Usage: "path for saving criu image files"}, cli.StringFlag{Name: "work-path", Value: "", Usage: "path for saving work files and logs"}, cli.BoolFlag{Name: "leave-running", Usage: "leave the process running after checkpointing"}, cli.BoolFlag{Name: "tcp-established", Usage: "allow open tcp connections"}, cli.BoolFlag{Name: "ext-unix-sk", Usage: "allow external unix sockets"}, cli.BoolFlag{Name: "shell-job", Usage: "allow shell jobs"}, cli.StringFlag{Name: "page-server", Value: "", Usage: "ADDRESS:PORT of the page server"}, cli.BoolFlag{Name: "file-locks", Usage: "handle file locks, for safety"}, cli.StringFlag{Name: "manage-cgroups-mode", Value: "", Usage: "cgroups mode: 'soft' (default), 'full' and 'strict'"}, cli.StringSliceFlag{Name: "empty-ns", Usage: "create a namespace, but don't restore its properies"}, }, Action: func(context *cli.Context) error { container, err := getContainer(context) if err != nil { return err } status, err := container.Status() if err != nil { return err } if status == libcontainer.Created { fatalf("Container cannot be checkpointed in created state") } defer destroy(container) options := criuOptions(context) // these are the mandatory criu options for a container setPageServer(context, options) setManageCgroupsMode(context, options) if err := setEmptyNsMask(context, options); err != nil { return err } if err := container.Checkpoint(options); err != nil { return err } return nil }, } func getCheckpointImagePath(context *cli.Context) string { imagePath := context.String("image-path") if imagePath == "" { imagePath = getDefaultImagePath(context) } return imagePath } func setPageServer(context *cli.Context, options *libcontainer.CriuOpts) { // xxx following criu opts are optional // The dump image can be sent to a criu page server if psOpt := context.String("page-server"); psOpt != "" { addressPort := strings.Split(psOpt, ":") if len(addressPort) != 2 { fatal(fmt.Errorf("Use --page-server ADDRESS:PORT to specify page server")) } portInt, err := strconv.Atoi(addressPort[1]) if err != nil { fatal(fmt.Errorf("Invalid port number")) } options.PageServer = libcontainer.CriuPageServerInfo{ Address: addressPort[0], Port: int32(portInt), } } } func setManageCgroupsMode(context *cli.Context, options *libcontainer.CriuOpts) { if cgOpt := context.String("manage-cgroups-mode"); cgOpt != "" { switch cgOpt { case "soft": options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_SOFT case "full": options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_FULL case "strict": options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_STRICT default: fatal(fmt.Errorf("Invalid manage cgroups mode")) } } } var namespaceMapping = map[specs.NamespaceType]int{ specs.NetworkNamespace: syscall.CLONE_NEWNET, } func setEmptyNsMask(context *cli.Context, options *libcontainer.CriuOpts) error { var nsmask int for _, ns := range context.StringSlice("empty-ns") { f, exists := namespaceMapping[specs.NamespaceType(ns)] if !exists { return fmt.Errorf("namespace %q is not supported", ns) } nsmask |= f } options.EmptyNs = uint32(nsmask) return nil } docker-runc-tags-docker-1.13.1/contrib/000077500000000000000000000000001304443252500176545ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/contrib/completions/000077500000000000000000000000001304443252500222105ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/contrib/completions/bash/000077500000000000000000000000001304443252500231255ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/contrib/completions/bash/runc000066400000000000000000000334321304443252500240240ustar00rootroot00000000000000#!/bin/bash # # bash completion file for runc command # # This script provides completion of: # - commands and their options # - filepaths # # To enable the completions either: # - place this file in /usr/share/bash-completion/completions # or # - copy this file to e.g. ~/.runc-completion.sh and add the line # below to your .bashrc after bash completion features are loaded # . ~/.runc-completion.sh # # Configuration: # # Note for developers: # Please arrange options sorted alphabetically by long name with the short # options immediately following their corresponding long form. # This order should be applied to lists, alternatives and code blocks. __runc_previous_extglob_setting=$(shopt -p extglob) shopt -s extglob __runc_list_all() { COMPREPLY=( $( compgen -W "$(runc list -q)" -- $cur) ) } __runc_pos_first_nonflag() { local argument_flags=$1 local counter=$((${subcommand_pos:-${command_pos}} + 1)) while [ $counter -le $cword ]; do if [ -n "$argument_flags" ] && eval "case '${words[$counter]}' in $argument_flags) true ;; *) false ;; esac"; then (( counter++ )) else case "${words[$counter]}" in -*) ;; *) break ;; esac fi (( counter++ )) done echo $counter } # Transforms a multiline list of strings into a single line string # with the words separated by "|". # This is used to prepare arguments to __runc_pos_first_nonflag(). __runc_to_alternatives() { local parts=( $1 ) local IFS='|' echo "${parts[*]}" } # Transforms a multiline list of options into an extglob pattern # suitable for use in case statements. __runc_to_extglob() { local extglob=$( __runc_to_alternatives "$1" ) echo "@($extglob)" } # Subcommand processing. # Locates the first occurrence of any of the subcommands contained in the # first argument. In case of a match, calls the corresponding completion # function and returns 0. # If no match is found, 1 is returned. The calling function can then # continue processing its completion. # # TODO if the preceding command has options that accept arguments and an # argument is equal ot one of the subcommands, this is falsely detected as # a match. __runc_subcommands() { local subcommands="$1" local counter=$(($command_pos + 1)) while [ $counter -lt $cword ]; do case "${words[$counter]}" in $(__runc_to_extglob "$subcommands") ) subcommand_pos=$counter local subcommand=${words[$counter]} local completions_func=_runc_${command}_${subcommand} declare -F $completions_func >/dev/null && $completions_func return 0 ;; esac (( counter++ )) done return 1 } # List all Signals __runc_list_signals() { COMPREPLY=( $( compgen -W "$(for i in $(kill -l | xargs); do echo $i; done | grep SIG)")) } # suppress trailing whitespace __runc_nospace() { # compopt is not available in ancient bash versions type compopt &>/dev/null && compopt -o nospace } # The list of capabilities is defined in types.go, ALL was added manually. __runc_complete_capabilities() { COMPREPLY=( $( compgen -W " ALL AUDIT_CONTROL AUDIT_WRITE AUDIT_READ BLOCK_SUSPEND CHOWN DAC_OVERRIDE DAC_READ_SEARCH FOWNER FSETID IPC_LOCK IPC_OWNER KILL LEASE LINUX_IMMUTABLE MAC_ADMIN MAC_OVERRIDE MKNOD NET_ADMIN NET_BIND_SERVICE NET_BROADCAST NET_RAW SETFCAP SETGID SETPCAP SETUID SYS_ADMIN SYS_BOOT SYS_CHROOT SYSLOG SYS_MODULE SYS_NICE SYS_PACCT SYS_PTRACE SYS_RAWIO SYS_RESOURCE SYS_TIME SYS_TTY_CONFIG WAKE_ALARM " -- "$cur" ) ) } _runc_exec() { local boolean_options=" --help --no-new-privs --tty, -t --detach, -d " local options_with_args=" --console --cwd --env, -e --user, -u --process, -p --pid-file --process-label --apparmor --cap, -c " local all_options="$options_with_args $boolean_options" case "$prev" in --cap|-c) __runc_complete_capabilities return ;; --console|--cwd|--process|--apparmor) case "$cur" in *:*) # TODO somehow do _filedir for stuff inside the image, if it's already specified (which is also somewhat difficult to determine) ;; '') COMPREPLY=( $( compgen -W '/' -- "$cur" ) ) __runc_nospace ;; /*) _filedir __runc_nospace ;; esac return ;; --env|-e) COMPREPLY=( $( compgen -e -- "$cur" ) ) __runc_nospace return ;; $(__runc_to_extglob "$options_with_args") ) return ;; esac case "$cur" in -*) COMPREPLY=( $( compgen -W "$all_options" -- "$cur" ) ) ;; *) __runc_list_all ;; esac } # global options that may appear after the runc command _runc_runc() { local boolean_options=" $global_boolean_options --help --version -v --debug " local options_with_args=" --log --log-format --root --criu " case "$prev" in --log|--root|--criu) case "$cur" in *:*) # TODO somehow do _filedir for stuff inside the image, if it's already specified (which is also somewhat difficult to determine) ;; '') COMPREPLY=( $( compgen -W '/' -- "$cur" ) ) __runc_nospace ;; *) _filedir __runc_nospace ;; esac return ;; --log-format) COMPREPLY=( $( compgen -W 'text json' -- "$cur" ) ) return ;; $(__runc_to_extglob "$options_with_args") ) return ;; esac case "$cur" in -*) COMPREPLY=( $( compgen -W "$boolean_options $options_with_args" -- "$cur" ) ) ;; *) local counter=$( __runc_pos_first_nonflag $(__runc_to_extglob "$options_with_args") ) if [ $cword -eq $counter ]; then COMPREPLY=( $( compgen -W "${commands[*]} help" -- "$cur" ) ) fi ;; esac } _runc_pause() { local boolean_options=" --help -h " case "$cur" in -*) COMPREPLY=( $( compgen -W "$boolean_options $options_with_args" -- "$cur" ) ) ;; *) __runc_list_all ;; esac } _runc_ps() { local boolean_options=" --help -h " case "$cur" in -*) COMPREPLY=( $( compgen -W "$boolean_options $options_with_args" -- "$cur" ) ) ;; *) __runc_list_all ;; esac } _runc_delete() { local boolean_options=" --help -h " case "$cur" in -*) COMPREPLY=( $( compgen -W "$boolean_options $options_with_args" -- "$cur" ) ) ;; *) __runc_list_all ;; esac } _runc_kill() { local boolean_options=" --help -h --all -a " case "$prev" in "kill") __runc_list_all return ;; *) __runc_list_signals return ;; esac case "$cur" in -*) COMPREPLY=( $( compgen -W "$boolean_options $options_with_args" -- "$cur" ) ) ;; *) __runc_list_all ;; esac } _runc_events() { local boolean_options=" --help --stats " local options_with_args=" --interval " case "$prev" in $(__runc_to_extglob "$options_with_args")) return ;; esac case "$cur" in -*) COMPREPLY=( $( compgen -W "$boolean_options $options_with_args" -- "$cur" ) ) ;; *) __runc_list_all ;; esac } _runc_list() { local boolean_options=" --help --quiet -q " local options_with_args=" --format -f " case "$prev" in --format|-f) COMPREPLY=( $( compgen -W 'text json' -- "$cur" ) ) return ;; $(__runc_to_extglob "$options_with_args")) return ;; esac case "$cur" in -*) COMPREPLY=( $( compgen -W "$boolean_options $options_with_args" -- "$cur" ) ) ;; *) local counter=$( __runc_pos_first_nonflag $(__runc_to_extglob "$options_with_args") ) ;; esac } _runc_spec() { local boolean_options=" --help " local options_with_args=" --bundle -b " case "$prev" in --bundle|-b) case "$cur" in '') COMPREPLY=( $( compgen -W '/' -- "$cur" ) ) __runc_nospace ;; /*) _filedir __runc_nospace ;; esac return ;; $(__runc_to_extglob "$options_with_args")) return ;; esac case "$cur" in -*) COMPREPLY=( $( compgen -W "$boolean_options $options_with_args" -- "$cur" ) ) ;; *) local counter=$( __runc_pos_first_nonflag $(__runc_to_extglob "$options_with_args") ) ;; esac } _runc_run() { local boolean_options=" --help --detatch -d --no-subreaper --no-pivot --no-new-keyring " local options_with_args=" --bundle -b --console --pid-file " case "$prev" in --bundle|-b|--console|--pid-file) case "$cur" in '') COMPREPLY=( $( compgen -W '/' -- "$cur" ) ) __runc_nospace ;; /*) _filedir __runc_nospace ;; esac return ;; $(__runc_to_extglob "$options_with_args")) return ;; esac case "$cur" in -*) COMPREPLY=( $( compgen -W "$boolean_options $options_with_args" -- "$cur" ) ) ;; *) __runc_list_all ;; esac } _runc_checkpoint() { local boolean_options=" --help -h --leave-running --tcp-established --ext-unix-sk --shell-job --file-locks " local options_with_args=" --image-path --work-path --page-server --manage-cgroups-mode " case "$prev" in --page-server) ;; --manage-cgroups-mode) COMPREPLY=( $( compgen -W "soft full strict" -- "$cur" ) ) return ;; --image-path|--work-path) case "$cur" in *:*) # TODO somehow do _filedir for stuff inside the image, if it's already specified (which is also somewhat difficult to determine) ;; '') COMPREPLY=( $( compgen -W '/' -- "$cur" ) ) __runc_nospace ;; *) _filedir __runc_nospace ;; esac return ;; $(__runc_to_extglob "$options_with_args")) return ;; esac case "$cur" in -*) COMPREPLY=( $( compgen -W "$boolean_options $options_with_args" -- "$cur" ) ) ;; *) __runc_list_all ;; esac } _runc_create() { local boolean_options=" --help --no-pivot --no-new-keyring " local options_with_args=" --bundle -b --console --pid-file " case "$prev" in --bundle|-b|--console|--pid-file) case "$cur" in '') COMPREPLY=( $( compgen -W '/' -- "$cur" ) ) __runc_nospace ;; /*) _filedir __runc_nospace ;; esac return ;; $(__runc_to_extglob "$options_with_args")) return ;; esac case "$cur" in -*) COMPREPLY=( $( compgen -W "$boolean_options $options_with_args" -- "$cur" ) ) ;; *) __runc_list_all ;; esac } _runc_help() { local counter=$(__runc_pos_first_nonflag) if [ $cword -eq $counter ]; then COMPREPLY=( $( compgen -W "${commands[*]}" -- "$cur" ) ) fi } _runc_restore() { local boolean_options=" --help --tcp-established --ext-unix-sk --shell-job --file-locks --detach -d --no-subreaper --no-pivot " local options_with_args=" -b --bundle --image-path --work-path --manage-cgroups-mode --pid-file " local all_options="$options_with_args $boolean_options" case "$prev" in --manage-cgroups-mode) COMPREPLY=( $( compgen -W "soft full strict" -- "$cur" ) ) return ;; --pid-file|--image-path|--work-path|--bundle|-b) case "$cur" in *:*) # TODO somehow do _filedir for stuff inside the image, if it's already specified (which is also somewhat difficult to determine) ;; '') COMPREPLY=( $( compgen -W '/' -- "$cur" ) ) __runc_nospace ;; /*) _filedir __runc_nospace ;; esac return ;; $(__runc_to_extglob "$options_with_args") ) return ;; esac case "$cur" in -*) COMPREPLY=( $( compgen -W "$all_options" -- "$cur" ) ) ;; *) __runc_list_all ;; esac } _runc_resume() { local boolean_options=" --help -h " case "$cur" in -*) COMPREPLY=( $( compgen -W "$boolean_options $options_with_args" -- "$cur" ) ) ;; *) __runc_list_all ;; esac } _runc_state() { local boolean_options=" --help -h " case "$cur" in -*) COMPREPLY=( $( compgen -W "$boolean_options $options_with_args" -- "$cur" ) ) ;; *) __runc_list_all ;; esac } _runc_start() { local boolean_options=" --help -h " case "$cur" in -*) COMPREPLY=( $( compgen -W "$boolean_options $options_with_args" -- "$cur" ) ) ;; *) __runc_list_all ;; esac } _runc_update() { local boolean_options=" --help " local options_with_args=" --blkio-weight --cpu-period --cpu-quota --cpu-rt-period --cpu-rt-runtime --cpu-share --cpuset-cpus --cpuset-mems --kernel-memory --kernel-memory-tcp --memory --memory-reservation --memory-swap " case "$prev" in $(__runc_to_extglob "$options_with_args")) return ;; esac case "$cur" in -*) COMPREPLY=( $( compgen -W "$boolean_options $options_with_args" -- "$cur" ) ) ;; *) __runc_list_all ;; esac } _runc() { local previous_extglob_setting=$(shopt -p extglob) shopt -s extglob local commands=( checkpoint create delete events exec init kill list pause ps restore resume run spec start state update help h ) # These options are valid as global options for all client commands # and valid as command options for `runc daemon` local global_boolean_options=" --help -h --version -v " COMPREPLY=() local cur prev words cword _get_comp_words_by_ref -n : cur prev words cword local command='runc' command_pos=0 subcommand_pos local counter=1 while [ $counter -lt $cword ]; do case "${words[$counter]}" in -*) ;; =) (( counter++ )) ;; *) command="${words[$counter]}" command_pos=$counter break ;; esac (( counter++ )) done local completions_func=_runc_${command} declare -F $completions_func >/dev/null && $completions_func eval "$previous_extglob_setting" return 0 } eval "$__runc_previous_extglob_setting" unset __runc_previous_extglob_setting complete -F _runc runc docker-runc-tags-docker-1.13.1/create.go000066400000000000000000000041411304443252500200060ustar00rootroot00000000000000package main import ( "fmt" "os" "github.com/urfave/cli" ) var createCommand = cli.Command{ Name: "create", Usage: "create a container", ArgsUsage: ` Where "" is your name for the instance of the container that you are starting. The name you provide for the container instance must be unique on your host.`, Description: `The create command creates an instance of a container for a bundle. The bundle is a directory with a specification file named "` + specConfig + `" and a root filesystem. The specification file includes an args parameter. The args parameter is used to specify command(s) that get run when the container is started. To change the command(s) that get executed on start, edit the args parameter of the spec. See "runc spec --help" for more explanation.`, Flags: []cli.Flag{ cli.StringFlag{ Name: "bundle, b", Value: "", Usage: `path to the root of the bundle directory, defaults to the current directory`, }, cli.StringFlag{ Name: "console", Value: "", Usage: "specify the pty slave path for use with the container", }, cli.StringFlag{ Name: "pid-file", Value: "", Usage: "specify the file to write the process id to", }, cli.BoolFlag{ Name: "no-pivot", Usage: "do not use pivot root to jail process inside rootfs. This should be used whenever the rootfs is on top of a ramdisk", }, cli.BoolFlag{ Name: "no-new-keyring", Usage: "do not create a new session keyring for the container. This will cause the container to inherit the calling processes session key", }, }, Action: func(context *cli.Context) error { if context.NArg() != 1 { fmt.Printf("Incorrect Usage.\n\n") cli.ShowCommandHelp(context, "create") return fmt.Errorf("runc: \"create\" requires exactly one argument") } spec, err := setupSpec(context) if err != nil { return err } status, err := startContainer(context, spec, true) if err != nil { return err } // exit with the container's exit status so any external supervisor is // notified of the exit with the correct exit status. os.Exit(status) return nil }, } docker-runc-tags-docker-1.13.1/delete.go000066400000000000000000000056311304443252500200120ustar00rootroot00000000000000// +build !solaris package main import ( "fmt" "os" "path/filepath" "syscall" "time" "github.com/opencontainers/runc/libcontainer" "github.com/urfave/cli" ) func killContainer(container libcontainer.Container) error { container.Signal(syscall.SIGKILL, false) for i := 0; i < 100; i++ { time.Sleep(100 * time.Millisecond) if err := container.Signal(syscall.Signal(0), false); err != nil { destroy(container) return nil } } return fmt.Errorf("container init still running") } var deleteCommand = cli.Command{ Name: "delete", Usage: "delete any resources held by one or more containers often used with detached containers", ArgsUsage: ` [container-id...] Where "" is the name for the instance of the container. EXAMPLE: For example, if the container id is "ubuntu01" and runc list currently shows the status of "ubuntu01" as "stopped" the following will delete resources held for "ubuntu01" removing "ubuntu01" from the runc list of containers: # runc delete ubuntu01`, Flags: []cli.Flag{ cli.BoolFlag{ Name: "force, f", Usage: "Forcibly deletes the container if it is still running (uses SIGKILL)", }, }, Action: func(context *cli.Context) error { hasError := false if !context.Args().Present() { return fmt.Errorf("runc: \"delete\" requires a minimum of 1 argument") } factory, err := loadFactory(context) if err != nil { return err } for _, id := range context.Args() { container, err := factory.Load(id) if err != nil { if lerr, ok := err.(libcontainer.Error); ok && lerr.Code() == libcontainer.ContainerNotExists { // if there was an aborted start or something of the sort then the container's directory could exist but // libcontainer does not see it because the state.json file inside that directory was never created. path := filepath.Join(context.GlobalString("root"), id) if err := os.RemoveAll(path); err != nil { fmt.Fprintf(os.Stderr, "remove %s: %v\n", path, err) } fmt.Fprintf(os.Stderr, "container %s does not exist\n", id) } hasError = true continue } s, err := container.Status() if err != nil { fmt.Fprintf(os.Stderr, "status for %s: %v\n", id, err) hasError = true continue } switch s { case libcontainer.Stopped: destroy(container) case libcontainer.Created: err := killContainer(container) if err != nil { fmt.Fprintf(os.Stderr, "kill container %s: %v\n", id, err) hasError = true } default: if context.Bool("force") { err := killContainer(container) if err != nil { fmt.Fprintf(os.Stderr, "kill container %s: %v\n", id, err) hasError = true } } else { fmt.Fprintf(os.Stderr, "cannot delete container %s that is not stopped: %s\n", id, s) hasError = true } } } if hasError { return fmt.Errorf("one or more of the container deletions failed") } return nil }, } docker-runc-tags-docker-1.13.1/events.go000066400000000000000000000166251304443252500200610ustar00rootroot00000000000000// +build linux package main import ( "encoding/json" "fmt" "os" "sync" "time" "github.com/Sirupsen/logrus" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/urfave/cli" ) // event struct for encoding the event data to json. type event struct { Type string `json:"type"` ID string `json:"id"` Data interface{} `json:"data,omitempty"` } // stats is the runc specific stats structure for stability when encoding and decoding stats. type stats struct { Cpu cpu `json:"cpu"` Memory memory `json:"memory"` Pids pids `json:"pids"` Blkio blkio `json:"blkio"` Hugetlb map[string]hugetlb `json:"hugetlb"` } type hugetlb struct { Usage uint64 `json:"usage,omitempty"` Max uint64 `json:"max,omitempty"` Failcnt uint64 `json:"failcnt"` } type blkioEntry struct { Major uint64 `json:"major,omitempty"` Minor uint64 `json:"minor,omitempty"` Op string `json:"op,omitempty"` Value uint64 `json:"value,omitempty"` } type blkio struct { IoServiceBytesRecursive []blkioEntry `json:"ioServiceBytesRecursive,omitempty"` IoServicedRecursive []blkioEntry `json:"ioServicedRecursive,omitempty"` IoQueuedRecursive []blkioEntry `json:"ioQueueRecursive,omitempty"` IoServiceTimeRecursive []blkioEntry `json:"ioServiceTimeRecursive,omitempty"` IoWaitTimeRecursive []blkioEntry `json:"ioWaitTimeRecursive,omitempty"` IoMergedRecursive []blkioEntry `json:"ioMergedRecursive,omitempty"` IoTimeRecursive []blkioEntry `json:"ioTimeRecursive,omitempty"` SectorsRecursive []blkioEntry `json:"sectorsRecursive,omitempty"` } type pids struct { Current uint64 `json:"current,omitempty"` Limit uint64 `json:"limit,omitempty"` } type throttling struct { Periods uint64 `json:"periods,omitempty"` ThrottledPeriods uint64 `json:"throttledPeriods,omitempty"` ThrottledTime uint64 `json:"throttledTime,omitempty"` } type cpuUsage struct { // Units: nanoseconds. Total uint64 `json:"total,omitempty"` Percpu []uint64 `json:"percpu,omitempty"` Kernel uint64 `json:"kernel"` User uint64 `json:"user"` } type cpu struct { Usage cpuUsage `json:"usage,omitempty"` Throttling throttling `json:"throttling,omitempty"` } type memoryEntry struct { Limit uint64 `json:"limit"` Usage uint64 `json:"usage,omitempty"` Max uint64 `json:"max,omitempty"` Failcnt uint64 `json:"failcnt"` } type memory struct { Cache uint64 `json:"cache,omitempty"` Usage memoryEntry `json:"usage,omitempty"` Swap memoryEntry `json:"swap,omitempty"` Kernel memoryEntry `json:"kernel,omitempty"` KernelTCP memoryEntry `json:"kernelTCP,omitempty"` Raw map[string]uint64 `json:"raw,omitempty"` } var eventsCommand = cli.Command{ Name: "events", Usage: "display container events such as OOM notifications, cpu, memory, and IO usage statistics", ArgsUsage: ` Where "" is the name for the instance of the container.`, Description: `The events command displays information about the container. By default the information is displayed once every 5 seconds.`, Flags: []cli.Flag{ cli.DurationFlag{Name: "interval", Value: 5 * time.Second, Usage: "set the stats collection interval"}, cli.BoolFlag{Name: "stats", Usage: "display the container's stats then exit"}, }, Action: func(context *cli.Context) error { container, err := getContainer(context) if err != nil { return err } duration := context.Duration("interval") if duration <= 0 { return fmt.Errorf("duration interval must be greater than 0") } status, err := container.Status() if err != nil { return err } if status == libcontainer.Stopped { return fmt.Errorf("container with id %s is not running", container.ID()) } var ( stats = make(chan *libcontainer.Stats, 1) events = make(chan *event, 1024) group = &sync.WaitGroup{} ) group.Add(1) go func() { defer group.Done() enc := json.NewEncoder(os.Stdout) for e := range events { if err := enc.Encode(e); err != nil { logrus.Error(err) } } }() if context.Bool("stats") { s, err := container.Stats() if err != nil { return err } events <- &event{Type: "stats", ID: container.ID(), Data: convertLibcontainerStats(s)} close(events) group.Wait() return nil } go func() { for range time.Tick(context.Duration("interval")) { s, err := container.Stats() if err != nil { logrus.Error(err) continue } stats <- s } }() n, err := container.NotifyOOM() if err != nil { return err } for { select { case _, ok := <-n: if ok { // this means an oom event was received, if it is !ok then // the channel was closed because the container stopped and // the cgroups no longer exist. events <- &event{Type: "oom", ID: container.ID()} } else { n = nil } case s := <-stats: events <- &event{Type: "stats", ID: container.ID(), Data: convertLibcontainerStats(s)} } if n == nil { close(events) break } } group.Wait() return nil }, } func convertLibcontainerStats(ls *libcontainer.Stats) *stats { cg := ls.CgroupStats if cg == nil { return nil } var s stats s.Pids.Current = cg.PidsStats.Current s.Pids.Limit = cg.PidsStats.Limit s.Cpu.Usage.Kernel = cg.CpuStats.CpuUsage.UsageInKernelmode s.Cpu.Usage.User = cg.CpuStats.CpuUsage.UsageInUsermode s.Cpu.Usage.Total = cg.CpuStats.CpuUsage.TotalUsage s.Cpu.Usage.Percpu = cg.CpuStats.CpuUsage.PercpuUsage s.Cpu.Throttling.Periods = cg.CpuStats.ThrottlingData.Periods s.Cpu.Throttling.ThrottledPeriods = cg.CpuStats.ThrottlingData.ThrottledPeriods s.Cpu.Throttling.ThrottledTime = cg.CpuStats.ThrottlingData.ThrottledTime s.Memory.Cache = cg.MemoryStats.Cache s.Memory.Kernel = convertMemoryEntry(cg.MemoryStats.KernelUsage) s.Memory.KernelTCP = convertMemoryEntry(cg.MemoryStats.KernelTCPUsage) s.Memory.Swap = convertMemoryEntry(cg.MemoryStats.SwapUsage) s.Memory.Usage = convertMemoryEntry(cg.MemoryStats.Usage) s.Memory.Raw = cg.MemoryStats.Stats s.Blkio.IoServiceBytesRecursive = convertBlkioEntry(cg.BlkioStats.IoServiceBytesRecursive) s.Blkio.IoServicedRecursive = convertBlkioEntry(cg.BlkioStats.IoServicedRecursive) s.Blkio.IoQueuedRecursive = convertBlkioEntry(cg.BlkioStats.IoQueuedRecursive) s.Blkio.IoServiceTimeRecursive = convertBlkioEntry(cg.BlkioStats.IoServiceTimeRecursive) s.Blkio.IoWaitTimeRecursive = convertBlkioEntry(cg.BlkioStats.IoWaitTimeRecursive) s.Blkio.IoMergedRecursive = convertBlkioEntry(cg.BlkioStats.IoMergedRecursive) s.Blkio.IoTimeRecursive = convertBlkioEntry(cg.BlkioStats.IoTimeRecursive) s.Blkio.SectorsRecursive = convertBlkioEntry(cg.BlkioStats.SectorsRecursive) s.Hugetlb = make(map[string]hugetlb) for k, v := range cg.HugetlbStats { s.Hugetlb[k] = convertHugtlb(v) } return &s } func convertHugtlb(c cgroups.HugetlbStats) hugetlb { return hugetlb{ Usage: c.Usage, Max: c.MaxUsage, Failcnt: c.Failcnt, } } func convertMemoryEntry(c cgroups.MemoryData) memoryEntry { return memoryEntry{ Limit: c.Limit, Usage: c.Usage, Max: c.MaxUsage, Failcnt: c.Failcnt, } } func convertBlkioEntry(c []cgroups.BlkioStatEntry) []blkioEntry { var out []blkioEntry for _, e := range c { out = append(out, blkioEntry{ Major: e.Major, Minor: e.Minor, Op: e.Op, Value: e.Value, }) } return out } docker-runc-tags-docker-1.13.1/exec.go000066400000000000000000000117201304443252500174700ustar00rootroot00000000000000// +build linux package main import ( "encoding/json" "fmt" "os" "strconv" "strings" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/utils" "github.com/opencontainers/runtime-spec/specs-go" "github.com/urfave/cli" ) var execCommand = cli.Command{ Name: "exec", Usage: "execute new process inside the container", ArgsUsage: ` [command options] Where "" is the name for the instance of the container and "" is the command to be executed in the container. EXAMPLE: For example, if the container is configured to run the linux ps command the following will output a list of processes running in the container: # runc exec ps`, Flags: []cli.Flag{ cli.StringFlag{ Name: "console", Usage: "specify the pty slave path for use with the container", }, cli.StringFlag{ Name: "cwd", Usage: "current working directory in the container", }, cli.StringSliceFlag{ Name: "env, e", Usage: "set environment variables", }, cli.BoolFlag{ Name: "tty, t", Usage: "allocate a pseudo-TTY", }, cli.StringFlag{ Name: "user, u", Usage: "UID (format: [:])", }, cli.StringFlag{ Name: "process, p", Usage: "path to the process.json", }, cli.BoolFlag{ Name: "detach,d", Usage: "detach from the container's process", }, cli.StringFlag{ Name: "pid-file", Value: "", Usage: "specify the file to write the process id to", }, cli.StringFlag{ Name: "process-label", Usage: "set the asm process label for the process commonly used with selinux", }, cli.StringFlag{ Name: "apparmor", Usage: "set the apparmor profile for the process", }, cli.BoolFlag{ Name: "no-new-privs", Usage: "set the no new privileges value for the process", }, cli.StringSliceFlag{ Name: "cap, c", Value: &cli.StringSlice{}, Usage: "add a capability to the bounding set for the process", }, cli.BoolFlag{ Name: "no-subreaper", Usage: "disable the use of the subreaper used to reap reparented processes", Hidden: true, }, }, Action: func(context *cli.Context) error { if os.Geteuid() != 0 { return fmt.Errorf("runc should be run as root") } status, err := execProcess(context) if err == nil { os.Exit(status) } return fmt.Errorf("exec failed: %v", err) }, SkipArgReorder: true, } func execProcess(context *cli.Context) (int, error) { container, err := getContainer(context) if err != nil { return -1, err } status, err := container.Status() if err != nil { return -1, err } if status == libcontainer.Stopped { return -1, fmt.Errorf("cannot exec a container that has run and stopped") } path := context.String("process") if path == "" && len(context.Args()) == 1 { return -1, fmt.Errorf("process args cannot be empty") } detach := context.Bool("detach") state, err := container.State() if err != nil { return -1, err } bundle := utils.SearchLabels(state.Config.Labels, "bundle") p, err := getProcess(context, bundle) if err != nil { return -1, err } r := &runner{ enableSubreaper: false, shouldDestroy: false, container: container, console: context.String("console"), detach: detach, pidFile: context.String("pid-file"), } return r.run(p) } func getProcess(context *cli.Context, bundle string) (*specs.Process, error) { if path := context.String("process"); path != "" { f, err := os.Open(path) if err != nil { return nil, err } defer f.Close() var p specs.Process if err := json.NewDecoder(f).Decode(&p); err != nil { return nil, err } return &p, validateProcessSpec(&p) } // process via cli flags if err := os.Chdir(bundle); err != nil { return nil, err } spec, err := loadSpec(specConfig) if err != nil { return nil, err } p := spec.Process p.Args = context.Args()[1:] // override the cwd, if passed if context.String("cwd") != "" { p.Cwd = context.String("cwd") } if ap := context.String("apparmor"); ap != "" { p.ApparmorProfile = ap } if l := context.String("process-label"); l != "" { p.SelinuxLabel = l } if caps := context.StringSlice("cap"); len(caps) > 0 { p.Capabilities = caps } // append the passed env variables for _, e := range context.StringSlice("env") { p.Env = append(p.Env, e) } // set the tty if context.IsSet("tty") { p.Terminal = context.Bool("tty") } if context.IsSet("no-new-privs") { p.NoNewPrivileges = context.Bool("no-new-privs") } // override the user, if passed if context.String("user") != "" { u := strings.SplitN(context.String("user"), ":", 2) if len(u) > 1 { gid, err := strconv.Atoi(u[1]) if err != nil { return nil, fmt.Errorf("parsing %s as int for gid failed: %v", u[1], err) } p.User.GID = uint32(gid) } uid, err := strconv.Atoi(u[0]) if err != nil { return nil, fmt.Errorf("parsing %s as int for uid failed: %v", u[0], err) } p.User.UID = uint32(uid) } return &p, nil } docker-runc-tags-docker-1.13.1/kill.go000066400000000000000000000050561304443252500175040ustar00rootroot00000000000000// +build linux package main import ( "fmt" "strconv" "strings" "syscall" "github.com/urfave/cli" ) var signalMap = map[string]syscall.Signal{ "ABRT": syscall.SIGABRT, "ALRM": syscall.SIGALRM, "BUS": syscall.SIGBUS, "CHLD": syscall.SIGCHLD, "CLD": syscall.SIGCLD, "CONT": syscall.SIGCONT, "FPE": syscall.SIGFPE, "HUP": syscall.SIGHUP, "ILL": syscall.SIGILL, "INT": syscall.SIGINT, "IO": syscall.SIGIO, "IOT": syscall.SIGIOT, "KILL": syscall.SIGKILL, "PIPE": syscall.SIGPIPE, "POLL": syscall.SIGPOLL, "PROF": syscall.SIGPROF, "PWR": syscall.SIGPWR, "QUIT": syscall.SIGQUIT, "SEGV": syscall.SIGSEGV, "STKFLT": syscall.SIGSTKFLT, "STOP": syscall.SIGSTOP, "SYS": syscall.SIGSYS, "TERM": syscall.SIGTERM, "TRAP": syscall.SIGTRAP, "TSTP": syscall.SIGTSTP, "TTIN": syscall.SIGTTIN, "TTOU": syscall.SIGTTOU, "UNUSED": syscall.SIGUNUSED, "URG": syscall.SIGURG, "USR1": syscall.SIGUSR1, "USR2": syscall.SIGUSR2, "VTALRM": syscall.SIGVTALRM, "WINCH": syscall.SIGWINCH, "XCPU": syscall.SIGXCPU, "XFSZ": syscall.SIGXFSZ, } var killCommand = cli.Command{ Name: "kill", Usage: "kill sends the specified signal (default: SIGTERM) to the container's init process", ArgsUsage: ` Where "" is the name for the instance of the container and "" is the signal to be sent to the init process. EXAMPLE: For example, if the container id is "ubuntu01" the following will send a "KILL" signal to the init process of the "ubuntu01" container: # runc kill ubuntu01 KILL`, Flags: []cli.Flag{ cli.BoolFlag{ Name: "all, a", Usage: "send the specified signal to all processes inside the container", }, }, Action: func(context *cli.Context) error { container, err := getContainer(context) if err != nil { return err } sigstr := context.Args().Get(1) if sigstr == "" { sigstr = "SIGTERM" } signal, err := parseSignal(sigstr) if err != nil { return err } if err := container.Signal(signal, context.Bool("all")); err != nil { return err } return nil }, } func parseSignal(rawSignal string) (syscall.Signal, error) { s, err := strconv.Atoi(rawSignal) if err == nil { sig := syscall.Signal(s) for _, msig := range signalMap { if sig == msig { return sig, nil } } return -1, fmt.Errorf("unknown signal %q", rawSignal) } signal, ok := signalMap[strings.TrimPrefix(strings.ToUpper(rawSignal), "SIG")] if !ok { return -1, fmt.Errorf("unknown signal %q", rawSignal) } return signal, nil } docker-runc-tags-docker-1.13.1/libcontainer/000077500000000000000000000000001304443252500206655ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/libcontainer/README.md000066400000000000000000000143341304443252500221510ustar00rootroot00000000000000Libcontainer provides a native Go implementation for creating containers with namespaces, cgroups, capabilities, and filesystem access controls. It allows you to manage the lifecycle of the container performing additional operations after the container is created. #### Container A container is a self contained execution environment that shares the kernel of the host system and which is (optionally) isolated from other containers in the system. #### Using libcontainer Because containers are spawned in a two step process you will need a binary that will be executed as the init process for the container. In libcontainer, we use the current binary (/proc/self/exe) to be executed as the init process, and use arg "init", we call the first step process "bootstrap", so you always need a "init" function as the entry of "bootstrap". ```go func init() { if len(os.Args) > 1 && os.Args[1] == "init" { runtime.GOMAXPROCS(1) runtime.LockOSThread() factory, _ := libcontainer.New("") if err := factory.StartInitialization(); err != nil { logrus.Fatal(err) } panic("--this line should have never been executed, congratulations--") } } ``` Then to create a container you first have to initialize an instance of a factory that will handle the creation and initialization for a container. ```go factory, err := libcontainer.New("/var/lib/container", libcontainer.Cgroupfs, libcontainer.InitArgs(os.Args[0], "init")) if err != nil { logrus.Fatal(err) return } ``` Once you have an instance of the factory created we can create a configuration struct describing how the container is to be created. A sample would look similar to this: ```go defaultMountFlags := syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV config := &configs.Config{ Rootfs: "/your/path/to/rootfs", Capabilities: []string{ "CAP_CHOWN", "CAP_DAC_OVERRIDE", "CAP_FSETID", "CAP_FOWNER", "CAP_MKNOD", "CAP_NET_RAW", "CAP_SETGID", "CAP_SETUID", "CAP_SETFCAP", "CAP_SETPCAP", "CAP_NET_BIND_SERVICE", "CAP_SYS_CHROOT", "CAP_KILL", "CAP_AUDIT_WRITE", }, Namespaces: configs.Namespaces([]configs.Namespace{ {Type: configs.NEWNS}, {Type: configs.NEWUTS}, {Type: configs.NEWIPC}, {Type: configs.NEWPID}, {Type: configs.NEWUSER}, {Type: configs.NEWNET}, }), Cgroups: &configs.Cgroup{ Name: "test-container", Parent: "system", Resources: &configs.Resources{ MemorySwappiness: nil, AllowAllDevices: nil, AllowedDevices: configs.DefaultAllowedDevices, }, }, MaskPaths: []string{ "/proc/kcore", "/sys/firmware", }, ReadonlyPaths: []string{ "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus", }, Devices: configs.DefaultAutoCreatedDevices, Hostname: "testing", Mounts: []*configs.Mount{ { Source: "proc", Destination: "/proc", Device: "proc", Flags: defaultMountFlags, }, { Source: "tmpfs", Destination: "/dev", Device: "tmpfs", Flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, Data: "mode=755", }, { Source: "devpts", Destination: "/dev/pts", Device: "devpts", Flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, Data: "newinstance,ptmxmode=0666,mode=0620,gid=5", }, { Device: "tmpfs", Source: "shm", Destination: "/dev/shm", Data: "mode=1777,size=65536k", Flags: defaultMountFlags, }, { Source: "mqueue", Destination: "/dev/mqueue", Device: "mqueue", Flags: defaultMountFlags, }, { Source: "sysfs", Destination: "/sys", Device: "sysfs", Flags: defaultMountFlags | syscall.MS_RDONLY, }, }, UidMappings: []configs.IDMap{ { ContainerID: 0, HostID: 1000, Size: 65536, }, }, GidMappings: []configs.IDMap{ { ContainerID: 0, HostID: 1000, Size: 65536, }, }, Networks: []*configs.Network{ { Type: "loopback", Address: "127.0.0.1/0", Gateway: "localhost", }, }, Rlimits: []configs.Rlimit{ { Type: syscall.RLIMIT_NOFILE, Hard: uint64(1025), Soft: uint64(1025), }, }, } ``` Once you have the configuration populated you can create a container: ```go container, err := factory.Create("container-id", config) if err != nil { logrus.Fatal(err) return } ``` To spawn bash as the initial process inside the container and have the processes pid returned in order to wait, signal, or kill the process: ```go process := &libcontainer.Process{ Args: []string{"/bin/bash"}, Env: []string{"PATH=/bin"}, User: "daemon", Stdin: os.Stdin, Stdout: os.Stdout, Stderr: os.Stderr, } err := container.Run(process) if err != nil { container.Destroy() logrus.Fatal(err) return } // wait for the process to finish. _, err := process.Wait() if err != nil { logrus.Fatal(err) } // destroy the container. container.Destroy() ``` Additional ways to interact with a running container are: ```go // return all the pids for all processes running inside the container. processes, err := container.Processes() // get detailed cpu, memory, io, and network statistics for the container and // it's processes. stats, err := container.Stats() // pause all processes inside the container. container.Pause() // resume all paused processes. container.Resume() // send signal to container's init process. container.Signal(signal) // update container resource constraints. container.Set(config) // get current status of the container. status, err := container.Status() // get current container's state information. state, err := container.State() ``` #### Checkpoint & Restore libcontainer now integrates [CRIU](http://criu.org/) for checkpointing and restoring containers. This let's you save the state of a process running inside a container to disk, and then restore that state into a new process, on the same machine or on another machine. `criu` version 1.5.2 or higher is required to use checkpoint and restore. If you don't already have `criu` installed, you can build it from source, following the [online instructions](http://criu.org/Installation). `criu` is also installed in the docker image generated when building libcontainer with docker. ## Copyright and license Code and documentation copyright 2014 Docker, inc. Code released under the Apache 2.0 license. Docs released under Creative commons. docker-runc-tags-docker-1.13.1/libcontainer/SPEC.md000066400000000000000000000317471304443252500217550ustar00rootroot00000000000000## Container Specification - v1 This is the standard configuration for version 1 containers. It includes namespaces, standard filesystem setup, a default Linux capability set, and information about resource reservations. It also has information about any populated environment settings for the processes running inside a container. Along with the configuration of how a container is created the standard also discusses actions that can be performed on a container to manage and inspect information about the processes running inside. The v1 profile is meant to be able to accommodate the majority of applications with a strong security configuration. ### System Requirements and Compatibility Minimum requirements: * Kernel version - 3.10 recommended 2.6.2x minimum(with backported patches) * Mounted cgroups with each subsystem in its own hierarchy ### Namespaces | Flag | Enabled | | ------------ | ------- | | CLONE_NEWPID | 1 | | CLONE_NEWUTS | 1 | | CLONE_NEWIPC | 1 | | CLONE_NEWNET | 1 | | CLONE_NEWNS | 1 | | CLONE_NEWUSER | 1 | Namespaces are created for the container via the `clone` syscall. ### Filesystem A root filesystem must be provided to a container for execution. The container will use this root filesystem (rootfs) to jail and spawn processes inside where the binaries and system libraries are local to that directory. Any binaries to be executed must be contained within this rootfs. Mounts that happen inside the container are automatically cleaned up when the container exits as the mount namespace is destroyed and the kernel will unmount all the mounts that were setup within that namespace. For a container to execute properly there are certain filesystems that are required to be mounted within the rootfs that the runtime will setup. | Path | Type | Flags | Data | | ----------- | ------ | -------------------------------------- | ---------------------------------------- | | /proc | proc | MS_NOEXEC,MS_NOSUID,MS_NODEV | | | /dev | tmpfs | MS_NOEXEC,MS_STRICTATIME | mode=755 | | /dev/shm | tmpfs | MS_NOEXEC,MS_NOSUID,MS_NODEV | mode=1777,size=65536k | | /dev/mqueue | mqueue | MS_NOEXEC,MS_NOSUID,MS_NODEV | | | /dev/pts | devpts | MS_NOEXEC,MS_NOSUID | newinstance,ptmxmode=0666,mode=620,gid=5 | | /sys | sysfs | MS_NOEXEC,MS_NOSUID,MS_NODEV,MS_RDONLY | | After a container's filesystems are mounted within the newly created mount namespace `/dev` will need to be populated with a set of device nodes. It is expected that a rootfs does not need to have any device nodes specified for `/dev` within the rootfs as the container will setup the correct devices that are required for executing a container's process. | Path | Mode | Access | | ------------ | ---- | ---------- | | /dev/null | 0666 | rwm | | /dev/zero | 0666 | rwm | | /dev/full | 0666 | rwm | | /dev/tty | 0666 | rwm | | /dev/random | 0666 | rwm | | /dev/urandom | 0666 | rwm | **ptmx** `/dev/ptmx` will need to be a symlink to the host's `/dev/ptmx` within the container. The use of a pseudo TTY is optional within a container and it should support both. If a pseudo is provided to the container `/dev/console` will need to be setup by binding the console in `/dev/` after it has been populated and mounted in tmpfs. | Source | Destination | UID GID | Mode | Type | | --------------- | ------------ | ------- | ---- | ---- | | *pty host path* | /dev/console | 0 0 | 0600 | bind | After `/dev/null` has been setup we check for any external links between the container's io, STDIN, STDOUT, STDERR. If the container's io is pointing to `/dev/null` outside the container we close and `dup2` the `/dev/null` that is local to the container's rootfs. After the container has `/proc` mounted a few standard symlinks are setup within `/dev/` for the io. | Source | Destination | | --------------- | ----------- | | /proc/self/fd | /dev/fd | | /proc/self/fd/0 | /dev/stdin | | /proc/self/fd/1 | /dev/stdout | | /proc/self/fd/2 | /dev/stderr | A `pivot_root` is used to change the root for the process, effectively jailing the process inside the rootfs. ```c put_old = mkdir(...); pivot_root(rootfs, put_old); chdir("/"); unmount(put_old, MS_DETACH); rmdir(put_old); ``` For container's running with a rootfs inside `ramfs` a `MS_MOVE` combined with a `chroot` is required as `pivot_root` is not supported in `ramfs`. ```c mount(rootfs, "/", NULL, MS_MOVE, NULL); chroot("."); chdir("/"); ``` The `umask` is set back to `0022` after the filesystem setup has been completed. ### Resources Cgroups are used to handle resource allocation for containers. This includes system resources like cpu, memory, and device access. | Subsystem | Enabled | | ---------- | ------- | | devices | 1 | | memory | 1 | | cpu | 1 | | cpuacct | 1 | | cpuset | 1 | | blkio | 1 | | perf_event | 1 | | freezer | 1 | | hugetlb | 1 | | pids | 1 | All cgroup subsystem are joined so that statistics can be collected from each of the subsystems. Freezer does not expose any stats but is joined so that containers can be paused and resumed. The parent process of the container's init must place the init pid inside the correct cgroups before the initialization begins. This is done so that no processes or threads escape the cgroups. This sync is done via a pipe ( specified in the runtime section below ) that the container's init process will block waiting for the parent to finish setup. ### Security The standard set of Linux capabilities that are set in a container provide a good default for security and flexibility for the applications. | Capability | Enabled | | -------------------- | ------- | | CAP_NET_RAW | 1 | | CAP_NET_BIND_SERVICE | 1 | | CAP_AUDIT_READ | 1 | | CAP_AUDIT_WRITE | 1 | | CAP_DAC_OVERRIDE | 1 | | CAP_SETFCAP | 1 | | CAP_SETPCAP | 1 | | CAP_SETGID | 1 | | CAP_SETUID | 1 | | CAP_MKNOD | 1 | | CAP_CHOWN | 1 | | CAP_FOWNER | 1 | | CAP_FSETID | 1 | | CAP_KILL | 1 | | CAP_SYS_CHROOT | 1 | | CAP_NET_BROADCAST | 0 | | CAP_SYS_MODULE | 0 | | CAP_SYS_RAWIO | 0 | | CAP_SYS_PACCT | 0 | | CAP_SYS_ADMIN | 0 | | CAP_SYS_NICE | 0 | | CAP_SYS_RESOURCE | 0 | | CAP_SYS_TIME | 0 | | CAP_SYS_TTY_CONFIG | 0 | | CAP_AUDIT_CONTROL | 0 | | CAP_MAC_OVERRIDE | 0 | | CAP_MAC_ADMIN | 0 | | CAP_NET_ADMIN | 0 | | CAP_SYSLOG | 0 | | CAP_DAC_READ_SEARCH | 0 | | CAP_LINUX_IMMUTABLE | 0 | | CAP_IPC_LOCK | 0 | | CAP_IPC_OWNER | 0 | | CAP_SYS_PTRACE | 0 | | CAP_SYS_BOOT | 0 | | CAP_LEASE | 0 | | CAP_WAKE_ALARM | 0 | | CAP_BLOCK_SUSPEND | 0 | Additional security layers like [apparmor](https://wiki.ubuntu.com/AppArmor) and [selinux](http://selinuxproject.org/page/Main_Page) can be used with the containers. A container should support setting an apparmor profile or selinux process and mount labels if provided in the configuration. Standard apparmor profile: ```c #include profile flags=(attach_disconnected,mediate_deleted) { #include network, capability, file, umount, deny @{PROC}/sys/fs/** wklx, deny @{PROC}/sysrq-trigger rwklx, deny @{PROC}/mem rwklx, deny @{PROC}/kmem rwklx, deny @{PROC}/sys/kernel/[^s][^h][^m]* wklx, deny @{PROC}/sys/kernel/*/** wklx, deny mount, deny /sys/[^f]*/** wklx, deny /sys/f[^s]*/** wklx, deny /sys/fs/[^c]*/** wklx, deny /sys/fs/c[^g]*/** wklx, deny /sys/fs/cg[^r]*/** wklx, deny /sys/firmware/efi/efivars/** rwklx, deny /sys/kernel/security/** rwklx, } ``` *TODO: seccomp work is being done to find a good default config* ### Runtime and Init Process During container creation the parent process needs to talk to the container's init process and have a form of synchronization. This is accomplished by creating a pipe that is passed to the container's init. When the init process first spawns it will block on its side of the pipe until the parent closes its side. This allows the parent to have time to set the new process inside a cgroup hierarchy and/or write any uid/gid mappings required for user namespaces. The pipe is passed to the init process via FD 3. The application consuming libcontainer should be compiled statically. libcontainer does not define any init process and the arguments provided are used to `exec` the process inside the application. There should be no long running init within the container spec. If a pseudo tty is provided to a container it will open and `dup2` the console as the container's STDIN, STDOUT, STDERR as well as mounting the console as `/dev/console`. An extra set of mounts are provided to a container and setup for use. A container's rootfs can contain some non portable files inside that can cause side effects during execution of a process. These files are usually created and populated with the container specific information via the runtime. **Extra runtime files:** * /etc/hosts * /etc/resolv.conf * /etc/hostname * /etc/localtime #### Defaults There are a few defaults that can be overridden by users, but in their omission these apply to processes within a container. | Type | Value | | ------------------- | ------------------------------ | | Parent Death Signal | SIGKILL | | UID | 0 | | GID | 0 | | GROUPS | 0, NULL | | CWD | "/" | | $HOME | Current user's home dir or "/" | | Readonly rootfs | false | | Pseudo TTY | false | ## Actions After a container is created there is a standard set of actions that can be done to the container. These actions are part of the public API for a container. | Action | Description | | -------------- | ------------------------------------------------------------------ | | Get processes | Return all the pids for processes running inside a container | | Get Stats | Return resource statistics for the container as a whole | | Wait | Waits on the container's init process ( pid 1 ) | | Wait Process | Wait on any of the container's processes returning the exit status | | Destroy | Kill the container's init process and remove any filesystem state | | Signal | Send a signal to the container's init process | | Signal Process | Send a signal to any of the container's processes | | Pause | Pause all processes inside the container | | Resume | Resume all processes inside the container if paused | | Exec | Execute a new process inside of the container ( requires setns ) | | Set | Setup configs of the container after it's created | ### Execute a new process inside of a running container. User can execute a new process inside of a running container. Any binaries to be executed must be accessible within the container's rootfs. The started process will run inside the container's rootfs. Any changes made by the process to the container's filesystem will persist after the process finished executing. The started process will join all the container's existing namespaces. When the container is paused, the process will also be paused and will resume when the container is unpaused. The started process will only run when the container's primary process (PID 1) is running, and will not be restarted when the container is restarted. #### Planned additions The started process will have its own cgroups nested inside the container's cgroups. This is used for process tracking and optionally resource allocation handling for the new process. Freezer cgroup is required, the rest of the cgroups are optional. The process executor must place its pid inside the correct cgroups before starting the process. This is done so that no child processes or threads can escape the cgroups. When the process is stopped, the process executor will try (in a best-effort way) to stop all its children and remove the sub-cgroups. docker-runc-tags-docker-1.13.1/libcontainer/apparmor/000077500000000000000000000000001304443252500225065ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/libcontainer/apparmor/apparmor.go000066400000000000000000000016751304443252500246670ustar00rootroot00000000000000// +build apparmor,linux package apparmor // #cgo LDFLAGS: -lapparmor // #include // #include import "C" import ( "fmt" "io/ioutil" "os" "unsafe" ) // IsEnabled returns true if apparmor is enabled for the host. func IsEnabled() bool { if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil && os.Getenv("container") == "" { if _, err = os.Stat("/sbin/apparmor_parser"); err == nil { buf, err := ioutil.ReadFile("/sys/module/apparmor/parameters/enabled") return err == nil && len(buf) > 1 && buf[0] == 'Y' } } return false } // ApplyProfile will apply the profile with the specified name to the process after // the next exec. func ApplyProfile(name string) error { if name == "" { return nil } cName := C.CString(name) defer C.free(unsafe.Pointer(cName)) if _, err := C.aa_change_onexec(cName); err != nil { return fmt.Errorf("apparmor failed to apply profile: %s", err) } return nil } docker-runc-tags-docker-1.13.1/libcontainer/apparmor/apparmor_disabled.go000066400000000000000000000004651304443252500265120ustar00rootroot00000000000000// +build !apparmor !linux package apparmor import ( "errors" ) var ErrApparmorNotEnabled = errors.New("apparmor: config provided but apparmor not supported") func IsEnabled() bool { return false } func ApplyProfile(name string) error { if name != "" { return ErrApparmorNotEnabled } return nil } docker-runc-tags-docker-1.13.1/libcontainer/capabilities_ambient.go000066400000000000000000000002641304443252500253460ustar00rootroot00000000000000// +build linux,ambient package libcontainer import "github.com/syndtr/gocapability/capability" const allCapabilityTypes = capability.CAPS | capability.BOUNDS | capability.AMBS docker-runc-tags-docker-1.13.1/libcontainer/capabilities_linux.go000066400000000000000000000027641304443252500250750ustar00rootroot00000000000000// +build linux package libcontainer import ( "fmt" "os" "strings" "github.com/syndtr/gocapability/capability" ) var capabilityMap map[string]capability.Cap func init() { capabilityMap = make(map[string]capability.Cap) last := capability.CAP_LAST_CAP // workaround for RHEL6 which has no /proc/sys/kernel/cap_last_cap if last == capability.Cap(63) { last = capability.CAP_BLOCK_SUSPEND } for _, cap := range capability.List() { if cap > last { continue } capKey := fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String())) capabilityMap[capKey] = cap } } func newCapWhitelist(caps []string) (*whitelist, error) { l := []capability.Cap{} for _, c := range caps { v, ok := capabilityMap[c] if !ok { return nil, fmt.Errorf("unknown capability %q", c) } l = append(l, v) } pid, err := capability.NewPid(os.Getpid()) if err != nil { return nil, err } return &whitelist{ keep: l, pid: pid, }, nil } type whitelist struct { pid capability.Capabilities keep []capability.Cap } // dropBoundingSet drops the capability bounding set to those specified in the whitelist. func (w *whitelist) dropBoundingSet() error { w.pid.Clear(capability.BOUNDS) w.pid.Set(capability.BOUNDS, w.keep...) return w.pid.Apply(capability.BOUNDS) } // drop drops all capabilities for the current process except those specified in the whitelist. func (w *whitelist) drop() error { w.pid.Clear(allCapabilityTypes) w.pid.Set(allCapabilityTypes, w.keep...) return w.pid.Apply(allCapabilityTypes) } docker-runc-tags-docker-1.13.1/libcontainer/capabilities_noambient.go000066400000000000000000000002431304443252500257000ustar00rootroot00000000000000// +build !ambient,linux package libcontainer import "github.com/syndtr/gocapability/capability" const allCapabilityTypes = capability.CAPS | capability.BOUNDS docker-runc-tags-docker-1.13.1/libcontainer/cgroups/000077500000000000000000000000001304443252500223475ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/libcontainer/cgroups/cgroups.go000066400000000000000000000026531304443252500243660ustar00rootroot00000000000000// +build linux package cgroups import ( "fmt" "github.com/opencontainers/runc/libcontainer/configs" ) type Manager interface { // Applies cgroup configuration to the process with the specified pid Apply(pid int) error // Returns the PIDs inside the cgroup set GetPids() ([]int, error) // Returns the PIDs inside the cgroup set & all sub-cgroups GetAllPids() ([]int, error) // Returns statistics for the cgroup set GetStats() (*Stats, error) // Toggles the freezer cgroup according with specified state Freeze(state configs.FreezerState) error // Destroys the cgroup set Destroy() error // NewCgroupManager() and LoadCgroupManager() require following attributes: // Paths map[string]string // Cgroups *cgroups.Cgroup // Paths maps cgroup subsystem to path at which it is mounted. // Cgroups specifies specific cgroup settings for the various subsystems // Returns cgroup paths to save in a state file and to be able to // restore the object later. GetPaths() map[string]string // Sets the cgroup as configured. Set(container *configs.Config) error } type NotFoundError struct { Subsystem string } func (e *NotFoundError) Error() string { return fmt.Sprintf("mountpoint for %s not found", e.Subsystem) } func NewNotFoundError(sub string) error { return &NotFoundError{ Subsystem: sub, } } func IsNotFound(err error) bool { if err == nil { return false } _, ok := err.(*NotFoundError) return ok } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/cgroups_test.go000066400000000000000000000003541304443252500254210ustar00rootroot00000000000000// +build linux package cgroups import ( "testing" ) func TestParseCgroups(t *testing.T) { cgroups, err := ParseCgroupFile("/proc/self/cgroup") if err != nil { t.Fatal(err) } if _, ok := cgroups["cpu"]; !ok { t.Fail() } } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/cgroups_unsupported.go000066400000000000000000000000421304443252500270240ustar00rootroot00000000000000// +build !linux package cgroups docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/000077500000000000000000000000001304443252500227575ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/apply_raw.go000066400000000000000000000210731304443252500253070ustar00rootroot00000000000000// +build linux package fs import ( "errors" "fmt" "io" "io/ioutil" "os" "path/filepath" "sync" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" ) var ( subsystems = subsystemSet{ &CpusetGroup{}, &DevicesGroup{}, &MemoryGroup{}, &CpuGroup{}, &CpuacctGroup{}, &PidsGroup{}, &BlkioGroup{}, &HugetlbGroup{}, &NetClsGroup{}, &NetPrioGroup{}, &PerfEventGroup{}, &FreezerGroup{}, &NameGroup{GroupName: "name=systemd", Join: true}, } HugePageSizes, _ = cgroups.GetHugePageSize() ) var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist") type subsystemSet []subsystem func (s subsystemSet) Get(name string) (subsystem, error) { for _, ss := range s { if ss.Name() == name { return ss, nil } } return nil, errSubsystemDoesNotExist } type subsystem interface { // Name returns the name of the subsystem. Name() string // Returns the stats, as 'stats', corresponding to the cgroup under 'path'. GetStats(path string, stats *cgroups.Stats) error // Removes the cgroup represented by 'cgroupData'. Remove(*cgroupData) error // Creates and joins the cgroup represented by 'cgroupData'. Apply(*cgroupData) error // Set the cgroup represented by cgroup. Set(path string, cgroup *configs.Cgroup) error } type Manager struct { mu sync.Mutex Cgroups *configs.Cgroup Paths map[string]string } // The absolute path to the root of the cgroup hierarchies. var cgroupRootLock sync.Mutex var cgroupRoot string // Gets the cgroupRoot. func getCgroupRoot() (string, error) { cgroupRootLock.Lock() defer cgroupRootLock.Unlock() if cgroupRoot != "" { return cgroupRoot, nil } root, err := cgroups.FindCgroupMountpointDir() if err != nil { return "", err } if _, err := os.Stat(root); err != nil { return "", err } cgroupRoot = root return cgroupRoot, nil } type cgroupData struct { root string innerPath string config *configs.Cgroup pid int } func (m *Manager) Apply(pid int) (err error) { if m.Cgroups == nil { return nil } m.mu.Lock() defer m.mu.Unlock() var c = m.Cgroups d, err := getCgroupData(m.Cgroups, pid) if err != nil { return err } if c.Paths != nil { paths := make(map[string]string) for name, path := range c.Paths { _, err := d.path(name) if err != nil { if cgroups.IsNotFound(err) { continue } return err } paths[name] = path } m.Paths = paths return cgroups.EnterPid(m.Paths, pid) } paths := make(map[string]string) for _, sys := range subsystems { if err := sys.Apply(d); err != nil { return err } // TODO: Apply should, ideally, be reentrant or be broken up into a separate // create and join phase so that the cgroup hierarchy for a container can be // created then join consists of writing the process pids to cgroup.procs p, err := d.path(sys.Name()) if err != nil { // The non-presence of the devices subsystem is // considered fatal for security reasons. if cgroups.IsNotFound(err) && sys.Name() != "devices" { continue } return err } paths[sys.Name()] = p } m.Paths = paths return nil } func (m *Manager) Destroy() error { if m.Cgroups.Paths != nil { return nil } m.mu.Lock() defer m.mu.Unlock() if err := cgroups.RemovePaths(m.Paths); err != nil { return err } m.Paths = make(map[string]string) return nil } func (m *Manager) GetPaths() map[string]string { m.mu.Lock() paths := m.Paths m.mu.Unlock() return paths } func (m *Manager) GetStats() (*cgroups.Stats, error) { m.mu.Lock() defer m.mu.Unlock() stats := cgroups.NewStats() for name, path := range m.Paths { sys, err := subsystems.Get(name) if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) { continue } if err := sys.GetStats(path, stats); err != nil { return nil, err } } return stats, nil } func (m *Manager) Set(container *configs.Config) error { // If Paths are set, then we are just joining cgroups paths // and there is no need to set any values. if m.Cgroups.Paths != nil { return nil } paths := m.GetPaths() for _, sys := range subsystems { path := paths[sys.Name()] if err := sys.Set(path, container.Cgroups); err != nil { return err } } if m.Paths["cpu"] != "" { if err := CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil { return err } } return nil } // Freeze toggles the container's freezer cgroup depending on the state // provided func (m *Manager) Freeze(state configs.FreezerState) error { paths := m.GetPaths() dir := paths["freezer"] prevState := m.Cgroups.Resources.Freezer m.Cgroups.Resources.Freezer = state freezer, err := subsystems.Get("freezer") if err != nil { return err } err = freezer.Set(dir, m.Cgroups) if err != nil { m.Cgroups.Resources.Freezer = prevState return err } return nil } func (m *Manager) GetPids() ([]int, error) { paths := m.GetPaths() return cgroups.GetPids(paths["devices"]) } func (m *Manager) GetAllPids() ([]int, error) { paths := m.GetPaths() return cgroups.GetAllPids(paths["devices"]) } func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) { root, err := getCgroupRoot() if err != nil { return nil, err } if (c.Name != "" || c.Parent != "") && c.Path != "" { return nil, fmt.Errorf("cgroup: either Path or Name and Parent should be used") } // XXX: Do not remove this code. Path safety is important! -- cyphar cgPath := libcontainerUtils.CleanPath(c.Path) cgParent := libcontainerUtils.CleanPath(c.Parent) cgName := libcontainerUtils.CleanPath(c.Name) innerPath := cgPath if innerPath == "" { innerPath = filepath.Join(cgParent, cgName) } return &cgroupData{ root: root, innerPath: innerPath, config: c, pid: pid, }, nil } func (raw *cgroupData) parentPath(subsystem, mountpoint, root string) (string, error) { // Use GetThisCgroupDir instead of GetInitCgroupDir, because the creating // process could in container and shared pid namespace with host, and // /proc/1/cgroup could point to whole other world of cgroups. initPath, err := cgroups.GetThisCgroupDir(subsystem) if err != nil { return "", err } // This is needed for nested containers, because in /proc/self/cgroup we // see pathes from host, which don't exist in container. relDir, err := filepath.Rel(root, initPath) if err != nil { return "", err } return filepath.Join(mountpoint, relDir), nil } func (raw *cgroupData) path(subsystem string) (string, error) { mnt, root, err := cgroups.FindCgroupMountpointAndRoot(subsystem) // If we didn't mount the subsystem, there is no point we make the path. if err != nil { return "", err } // If the cgroup name/path is absolute do not look relative to the cgroup of the init process. if filepath.IsAbs(raw.innerPath) { // Sometimes subsystems can be mounted together as 'cpu,cpuacct'. return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil } parentPath, err := raw.parentPath(subsystem, mnt, root) if err != nil { return "", err } return filepath.Join(parentPath, raw.innerPath), nil } func (raw *cgroupData) join(subsystem string) (string, error) { path, err := raw.path(subsystem) if err != nil { return "", err } if err := os.MkdirAll(path, 0755); err != nil { return "", err } if err := cgroups.WriteCgroupProc(path, raw.pid); err != nil { return "", err } return path, nil } func writeFile(dir, file, data string) error { // Normally dir should not be empty, one case is that cgroup subsystem // is not mounted, we will get empty dir, and we want it fail here. if dir == "" { return fmt.Errorf("no such directory for %s", file) } if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700); err != nil { return fmt.Errorf("failed to write %v to %v: %v", data, file, err) } return nil } func readFile(dir, file string) (string, error) { data, err := ioutil.ReadFile(filepath.Join(dir, file)) return string(data), err } func removePath(p string, err error) error { if err != nil { return err } if p != "" { return os.RemoveAll(p) } return nil } func CheckCpushares(path string, c int64) error { var cpuShares int64 if c == 0 { return nil } fd, err := os.Open(filepath.Join(path, "cpu.shares")) if err != nil { return err } defer fd.Close() _, err = fmt.Fscanf(fd, "%d", &cpuShares) if err != nil && err != io.EOF { return err } if c > cpuShares { return fmt.Errorf("The maximum allowed cpu-shares is %d", cpuShares) } else if c < cpuShares { return fmt.Errorf("The minimum allowed cpu-shares is %d", cpuShares) } return nil } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/apply_raw_test.go000066400000000000000000000170231304443252500263460ustar00rootroot00000000000000// +build linux package fs import ( "path/filepath" "strings" "testing" "github.com/opencontainers/runc/libcontainer/configs" ) func TestInvalidCgroupPath(t *testing.T) { root, err := getCgroupRoot() if err != nil { t.Errorf("couldn't get cgroup root: %v", err) } config := &configs.Cgroup{ Path: "../../../../../../../../../../some/path", } data, err := getCgroupData(config, 0) if err != nil { t.Errorf("couldn't get cgroup data: %v", err) } // Make sure the final innerPath doesn't go outside the cgroup mountpoint. if strings.HasPrefix(data.innerPath, "..") { t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!") } // Double-check, using an actual cgroup. deviceRoot := filepath.Join(root, "devices") devicePath, err := data.path("devices") if err != nil { t.Errorf("couldn't get cgroup path: %v", err) } if !strings.HasPrefix(devicePath, deviceRoot) { t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!") } } func TestInvalidAbsoluteCgroupPath(t *testing.T) { root, err := getCgroupRoot() if err != nil { t.Errorf("couldn't get cgroup root: %v", err) } config := &configs.Cgroup{ Path: "/../../../../../../../../../../some/path", } data, err := getCgroupData(config, 0) if err != nil { t.Errorf("couldn't get cgroup data: %v", err) } // Make sure the final innerPath doesn't go outside the cgroup mountpoint. if strings.HasPrefix(data.innerPath, "..") { t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!") } // Double-check, using an actual cgroup. deviceRoot := filepath.Join(root, "devices") devicePath, err := data.path("devices") if err != nil { t.Errorf("couldn't get cgroup path: %v", err) } if !strings.HasPrefix(devicePath, deviceRoot) { t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!") } } // XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent. func TestInvalidCgroupParent(t *testing.T) { root, err := getCgroupRoot() if err != nil { t.Errorf("couldn't get cgroup root: %v", err) } config := &configs.Cgroup{ Parent: "../../../../../../../../../../some/path", Name: "name", } data, err := getCgroupData(config, 0) if err != nil { t.Errorf("couldn't get cgroup data: %v", err) } // Make sure the final innerPath doesn't go outside the cgroup mountpoint. if strings.HasPrefix(data.innerPath, "..") { t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!") } // Double-check, using an actual cgroup. deviceRoot := filepath.Join(root, "devices") devicePath, err := data.path("devices") if err != nil { t.Errorf("couldn't get cgroup path: %v", err) } if !strings.HasPrefix(devicePath, deviceRoot) { t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!") } } // XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent. func TestInvalidAbsoluteCgroupParent(t *testing.T) { root, err := getCgroupRoot() if err != nil { t.Errorf("couldn't get cgroup root: %v", err) } config := &configs.Cgroup{ Parent: "/../../../../../../../../../../some/path", Name: "name", } data, err := getCgroupData(config, 0) if err != nil { t.Errorf("couldn't get cgroup data: %v", err) } // Make sure the final innerPath doesn't go outside the cgroup mountpoint. if strings.HasPrefix(data.innerPath, "..") { t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!") } // Double-check, using an actual cgroup. deviceRoot := filepath.Join(root, "devices") devicePath, err := data.path("devices") if err != nil { t.Errorf("couldn't get cgroup path: %v", err) } if !strings.HasPrefix(devicePath, deviceRoot) { t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!") } } // XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent. func TestInvalidCgroupName(t *testing.T) { root, err := getCgroupRoot() if err != nil { t.Errorf("couldn't get cgroup root: %v", err) } config := &configs.Cgroup{ Parent: "parent", Name: "../../../../../../../../../../some/path", } data, err := getCgroupData(config, 0) if err != nil { t.Errorf("couldn't get cgroup data: %v", err) } // Make sure the final innerPath doesn't go outside the cgroup mountpoint. if strings.HasPrefix(data.innerPath, "..") { t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!") } // Double-check, using an actual cgroup. deviceRoot := filepath.Join(root, "devices") devicePath, err := data.path("devices") if err != nil { t.Errorf("couldn't get cgroup path: %v", err) } if !strings.HasPrefix(devicePath, deviceRoot) { t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!") } } // XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent. func TestInvalidAbsoluteCgroupName(t *testing.T) { root, err := getCgroupRoot() if err != nil { t.Errorf("couldn't get cgroup root: %v", err) } config := &configs.Cgroup{ Parent: "parent", Name: "/../../../../../../../../../../some/path", } data, err := getCgroupData(config, 0) if err != nil { t.Errorf("couldn't get cgroup data: %v", err) } // Make sure the final innerPath doesn't go outside the cgroup mountpoint. if strings.HasPrefix(data.innerPath, "..") { t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!") } // Double-check, using an actual cgroup. deviceRoot := filepath.Join(root, "devices") devicePath, err := data.path("devices") if err != nil { t.Errorf("couldn't get cgroup path: %v", err) } if !strings.HasPrefix(devicePath, deviceRoot) { t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!") } } // XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent. func TestInvalidCgroupNameAndParent(t *testing.T) { root, err := getCgroupRoot() if err != nil { t.Errorf("couldn't get cgroup root: %v", err) } config := &configs.Cgroup{ Parent: "../../../../../../../../../../some/path", Name: "../../../../../../../../../../some/path", } data, err := getCgroupData(config, 0) if err != nil { t.Errorf("couldn't get cgroup data: %v", err) } // Make sure the final innerPath doesn't go outside the cgroup mountpoint. if strings.HasPrefix(data.innerPath, "..") { t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!") } // Double-check, using an actual cgroup. deviceRoot := filepath.Join(root, "devices") devicePath, err := data.path("devices") if err != nil { t.Errorf("couldn't get cgroup path: %v", err) } if !strings.HasPrefix(devicePath, deviceRoot) { t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!") } } // XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent. func TestInvalidAbsoluteCgroupNameAndParent(t *testing.T) { root, err := getCgroupRoot() if err != nil { t.Errorf("couldn't get cgroup root: %v", err) } config := &configs.Cgroup{ Parent: "/../../../../../../../../../../some/path", Name: "/../../../../../../../../../../some/path", } data, err := getCgroupData(config, 0) if err != nil { t.Errorf("couldn't get cgroup data: %v", err) } // Make sure the final innerPath doesn't go outside the cgroup mountpoint. if strings.HasPrefix(data.innerPath, "..") { t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!") } // Double-check, using an actual cgroup. deviceRoot := filepath.Join(root, "devices") devicePath, err := data.path("devices") if err != nil { t.Errorf("couldn't get cgroup path: %v", err) } if !strings.HasPrefix(devicePath, deviceRoot) { t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!") } } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/blkio.go000066400000000000000000000134671304443252500244210ustar00rootroot00000000000000// +build linux package fs import ( "bufio" "fmt" "os" "path/filepath" "strconv" "strings" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" ) type BlkioGroup struct { } func (s *BlkioGroup) Name() string { return "blkio" } func (s *BlkioGroup) Apply(d *cgroupData) error { _, err := d.join("blkio") if err != nil && !cgroups.IsNotFound(err) { return err } return nil } func (s *BlkioGroup) Set(path string, cgroup *configs.Cgroup) error { if cgroup.Resources.BlkioWeight != 0 { if err := writeFile(path, "blkio.weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioWeight), 10)); err != nil { return err } } if cgroup.Resources.BlkioLeafWeight != 0 { if err := writeFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioLeafWeight), 10)); err != nil { return err } } for _, wd := range cgroup.Resources.BlkioWeightDevice { if err := writeFile(path, "blkio.weight_device", wd.WeightString()); err != nil { return err } if err := writeFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil { return err } } for _, td := range cgroup.Resources.BlkioThrottleReadBpsDevice { if err := writeFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil { return err } } for _, td := range cgroup.Resources.BlkioThrottleWriteBpsDevice { if err := writeFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil { return err } } for _, td := range cgroup.Resources.BlkioThrottleReadIOPSDevice { if err := writeFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil { return err } } for _, td := range cgroup.Resources.BlkioThrottleWriteIOPSDevice { if err := writeFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil { return err } } return nil } func (s *BlkioGroup) Remove(d *cgroupData) error { return removePath(d.path("blkio")) } /* examples: blkio.sectors 8:0 6792 blkio.io_service_bytes 8:0 Read 1282048 8:0 Write 2195456 8:0 Sync 2195456 8:0 Async 1282048 8:0 Total 3477504 Total 3477504 blkio.io_serviced 8:0 Read 124 8:0 Write 104 8:0 Sync 104 8:0 Async 124 8:0 Total 228 Total 228 blkio.io_queued 8:0 Read 0 8:0 Write 0 8:0 Sync 0 8:0 Async 0 8:0 Total 0 Total 0 */ func splitBlkioStatLine(r rune) bool { return r == ' ' || r == ':' } func getBlkioStat(path string) ([]cgroups.BlkioStatEntry, error) { var blkioStats []cgroups.BlkioStatEntry f, err := os.Open(path) if err != nil { if os.IsNotExist(err) { return blkioStats, nil } return nil, err } defer f.Close() sc := bufio.NewScanner(f) for sc.Scan() { // format: dev type amount fields := strings.FieldsFunc(sc.Text(), splitBlkioStatLine) if len(fields) < 3 { if len(fields) == 2 && fields[0] == "Total" { // skip total line continue } else { return nil, fmt.Errorf("Invalid line found while parsing %s: %s", path, sc.Text()) } } v, err := strconv.ParseUint(fields[0], 10, 64) if err != nil { return nil, err } major := v v, err = strconv.ParseUint(fields[1], 10, 64) if err != nil { return nil, err } minor := v op := "" valueField := 2 if len(fields) == 4 { op = fields[2] valueField = 3 } v, err = strconv.ParseUint(fields[valueField], 10, 64) if err != nil { return nil, err } blkioStats = append(blkioStats, cgroups.BlkioStatEntry{Major: major, Minor: minor, Op: op, Value: v}) } return blkioStats, nil } func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error { // Try to read CFQ stats available on all CFQ enabled kernels first if blkioStats, err := getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err == nil && blkioStats != nil { return getCFQStats(path, stats) } return getStats(path, stats) // Use generic stats as fallback } func getCFQStats(path string, stats *cgroups.Stats) error { var blkioStats []cgroups.BlkioStatEntry var err error if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.sectors_recursive")); err != nil { return err } stats.BlkioStats.SectorsRecursive = blkioStats if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_bytes_recursive")); err != nil { return err } stats.BlkioStats.IoServiceBytesRecursive = blkioStats if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err != nil { return err } stats.BlkioStats.IoServicedRecursive = blkioStats if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_queued_recursive")); err != nil { return err } stats.BlkioStats.IoQueuedRecursive = blkioStats if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_time_recursive")); err != nil { return err } stats.BlkioStats.IoServiceTimeRecursive = blkioStats if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_wait_time_recursive")); err != nil { return err } stats.BlkioStats.IoWaitTimeRecursive = blkioStats if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_merged_recursive")); err != nil { return err } stats.BlkioStats.IoMergedRecursive = blkioStats if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.time_recursive")); err != nil { return err } stats.BlkioStats.IoTimeRecursive = blkioStats return nil } func getStats(path string, stats *cgroups.Stats) error { var blkioStats []cgroups.BlkioStatEntry var err error if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.throttle.io_service_bytes")); err != nil { return err } stats.BlkioStats.IoServiceBytesRecursive = blkioStats if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.throttle.io_serviced")); err != nil { return err } stats.BlkioStats.IoServicedRecursive = blkioStats return nil } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/blkio_test.go000066400000000000000000000527141304443252500254560ustar00rootroot00000000000000// +build linux package fs import ( "strconv" "testing" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" ) const ( sectorsRecursiveContents = `8:0 1024` serviceBytesRecursiveContents = `8:0 Read 100 8:0 Write 200 8:0 Sync 300 8:0 Async 500 8:0 Total 500 Total 500` servicedRecursiveContents = `8:0 Read 10 8:0 Write 40 8:0 Sync 20 8:0 Async 30 8:0 Total 50 Total 50` queuedRecursiveContents = `8:0 Read 1 8:0 Write 4 8:0 Sync 2 8:0 Async 3 8:0 Total 5 Total 5` serviceTimeRecursiveContents = `8:0 Read 173959 8:0 Write 0 8:0 Sync 0 8:0 Async 173959 8:0 Total 17395 Total 17395` waitTimeRecursiveContents = `8:0 Read 15571 8:0 Write 0 8:0 Sync 0 8:0 Async 15571 8:0 Total 15571` mergedRecursiveContents = `8:0 Read 5 8:0 Write 10 8:0 Sync 0 8:0 Async 0 8:0 Total 15 Total 15` timeRecursiveContents = `8:0 8` throttleServiceBytes = `8:0 Read 11030528 8:0 Write 23 8:0 Sync 42 8:0 Async 11030528 8:0 Total 11030528 252:0 Read 11030528 252:0 Write 23 252:0 Sync 42 252:0 Async 11030528 252:0 Total 11030528 Total 22061056` throttleServiced = `8:0 Read 164 8:0 Write 23 8:0 Sync 42 8:0 Async 164 8:0 Total 164 252:0 Read 164 252:0 Write 23 252:0 Sync 42 252:0 Async 164 252:0 Total 164 Total 328` ) func appendBlkioStatEntry(blkioStatEntries *[]cgroups.BlkioStatEntry, major, minor, value uint64, op string) { *blkioStatEntries = append(*blkioStatEntries, cgroups.BlkioStatEntry{Major: major, Minor: minor, Value: value, Op: op}) } func TestBlkioSetWeight(t *testing.T) { helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() const ( weightBefore = 100 weightAfter = 200 ) helper.writeFileContents(map[string]string{ "blkio.weight": strconv.Itoa(weightBefore), }) helper.CgroupData.config.Resources.BlkioWeight = weightAfter blkio := &BlkioGroup{} if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } value, err := getCgroupParamUint(helper.CgroupPath, "blkio.weight") if err != nil { t.Fatalf("Failed to parse blkio.weight - %s", err) } if value != weightAfter { t.Fatal("Got the wrong value, set blkio.weight failed.") } } func TestBlkioSetWeightDevice(t *testing.T) { helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() const ( weightDeviceBefore = "8:0 400" ) wd := configs.NewWeightDevice(8, 0, 500, 0) weightDeviceAfter := wd.WeightString() helper.writeFileContents(map[string]string{ "blkio.weight_device": weightDeviceBefore, }) helper.CgroupData.config.Resources.BlkioWeightDevice = []*configs.WeightDevice{wd} blkio := &BlkioGroup{} if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } value, err := getCgroupParamString(helper.CgroupPath, "blkio.weight_device") if err != nil { t.Fatalf("Failed to parse blkio.weight_device - %s", err) } if value != weightDeviceAfter { t.Fatal("Got the wrong value, set blkio.weight_device failed.") } } // regression #274 func TestBlkioSetMultipleWeightDevice(t *testing.T) { helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() const ( weightDeviceBefore = "8:0 400" ) wd1 := configs.NewWeightDevice(8, 0, 500, 0) wd2 := configs.NewWeightDevice(8, 16, 500, 0) // we cannot actually set and check both because normal ioutil.WriteFile // when writing to cgroup file will overwrite the whole file content instead // of updating it as the kernel is doing. Just check the second device // is present will suffice for the test to ensure multiple writes are done. weightDeviceAfter := wd2.WeightString() helper.writeFileContents(map[string]string{ "blkio.weight_device": weightDeviceBefore, }) helper.CgroupData.config.Resources.BlkioWeightDevice = []*configs.WeightDevice{wd1, wd2} blkio := &BlkioGroup{} if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } value, err := getCgroupParamString(helper.CgroupPath, "blkio.weight_device") if err != nil { t.Fatalf("Failed to parse blkio.weight_device - %s", err) } if value != weightDeviceAfter { t.Fatal("Got the wrong value, set blkio.weight_device failed.") } } func TestBlkioStats(t *testing.T) { helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, "blkio.io_serviced_recursive": servicedRecursiveContents, "blkio.io_queued_recursive": queuedRecursiveContents, "blkio.io_service_time_recursive": serviceTimeRecursiveContents, "blkio.io_wait_time_recursive": waitTimeRecursiveContents, "blkio.io_merged_recursive": mergedRecursiveContents, "blkio.time_recursive": timeRecursiveContents, "blkio.sectors_recursive": sectorsRecursiveContents, }) blkio := &BlkioGroup{} actualStats := *cgroups.NewStats() err := blkio.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatal(err) } // Verify expected stats. expectedStats := cgroups.BlkioStats{} appendBlkioStatEntry(&expectedStats.SectorsRecursive, 8, 0, 1024, "") appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 100, "Read") appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 200, "Write") appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 300, "Sync") appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 500, "Async") appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 500, "Total") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 10, "Read") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 40, "Write") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 20, "Sync") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 30, "Async") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 50, "Total") appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 1, "Read") appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 4, "Write") appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 2, "Sync") appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 3, "Async") appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 5, "Total") appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 173959, "Read") appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 0, "Write") appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 0, "Sync") appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 173959, "Async") appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 17395, "Total") appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Read") appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 0, "Write") appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 0, "Sync") appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Async") appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Total") appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 5, "Read") appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 10, "Write") appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 0, "Sync") appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 0, "Async") appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 15, "Total") appendBlkioStatEntry(&expectedStats.IoTimeRecursive, 8, 0, 8, "") expectBlkioStatsEquals(t, expectedStats, actualStats.BlkioStats) } func TestBlkioStatsNoSectorsFile(t *testing.T) { helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, "blkio.io_serviced_recursive": servicedRecursiveContents, "blkio.io_queued_recursive": queuedRecursiveContents, "blkio.io_service_time_recursive": serviceTimeRecursiveContents, "blkio.io_wait_time_recursive": waitTimeRecursiveContents, "blkio.io_merged_recursive": mergedRecursiveContents, "blkio.time_recursive": timeRecursiveContents, }) blkio := &BlkioGroup{} actualStats := *cgroups.NewStats() err := blkio.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatalf("Failed unexpectedly: %s", err) } } func TestBlkioStatsNoServiceBytesFile(t *testing.T) { helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "blkio.io_serviced_recursive": servicedRecursiveContents, "blkio.io_queued_recursive": queuedRecursiveContents, "blkio.sectors_recursive": sectorsRecursiveContents, "blkio.io_service_time_recursive": serviceTimeRecursiveContents, "blkio.io_wait_time_recursive": waitTimeRecursiveContents, "blkio.io_merged_recursive": mergedRecursiveContents, "blkio.time_recursive": timeRecursiveContents, }) blkio := &BlkioGroup{} actualStats := *cgroups.NewStats() err := blkio.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatalf("Failed unexpectedly: %s", err) } } func TestBlkioStatsNoServicedFile(t *testing.T) { helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, "blkio.io_queued_recursive": queuedRecursiveContents, "blkio.sectors_recursive": sectorsRecursiveContents, "blkio.io_service_time_recursive": serviceTimeRecursiveContents, "blkio.io_wait_time_recursive": waitTimeRecursiveContents, "blkio.io_merged_recursive": mergedRecursiveContents, "blkio.time_recursive": timeRecursiveContents, }) blkio := &BlkioGroup{} actualStats := *cgroups.NewStats() err := blkio.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatalf("Failed unexpectedly: %s", err) } } func TestBlkioStatsNoQueuedFile(t *testing.T) { helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, "blkio.io_serviced_recursive": servicedRecursiveContents, "blkio.sectors_recursive": sectorsRecursiveContents, "blkio.io_service_time_recursive": serviceTimeRecursiveContents, "blkio.io_wait_time_recursive": waitTimeRecursiveContents, "blkio.io_merged_recursive": mergedRecursiveContents, "blkio.time_recursive": timeRecursiveContents, }) blkio := &BlkioGroup{} actualStats := *cgroups.NewStats() err := blkio.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatalf("Failed unexpectedly: %s", err) } } func TestBlkioStatsNoServiceTimeFile(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, "blkio.io_serviced_recursive": servicedRecursiveContents, "blkio.io_queued_recursive": queuedRecursiveContents, "blkio.io_wait_time_recursive": waitTimeRecursiveContents, "blkio.io_merged_recursive": mergedRecursiveContents, "blkio.time_recursive": timeRecursiveContents, "blkio.sectors_recursive": sectorsRecursiveContents, }) blkio := &BlkioGroup{} actualStats := *cgroups.NewStats() err := blkio.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatalf("Failed unexpectedly: %s", err) } } func TestBlkioStatsNoWaitTimeFile(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, "blkio.io_serviced_recursive": servicedRecursiveContents, "blkio.io_queued_recursive": queuedRecursiveContents, "blkio.io_service_time_recursive": serviceTimeRecursiveContents, "blkio.io_merged_recursive": mergedRecursiveContents, "blkio.time_recursive": timeRecursiveContents, "blkio.sectors_recursive": sectorsRecursiveContents, }) blkio := &BlkioGroup{} actualStats := *cgroups.NewStats() err := blkio.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatalf("Failed unexpectedly: %s", err) } } func TestBlkioStatsNoMergedFile(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, "blkio.io_serviced_recursive": servicedRecursiveContents, "blkio.io_queued_recursive": queuedRecursiveContents, "blkio.io_service_time_recursive": serviceTimeRecursiveContents, "blkio.io_wait_time_recursive": waitTimeRecursiveContents, "blkio.time_recursive": timeRecursiveContents, "blkio.sectors_recursive": sectorsRecursiveContents, }) blkio := &BlkioGroup{} actualStats := *cgroups.NewStats() err := blkio.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatalf("Failed unexpectedly: %s", err) } } func TestBlkioStatsNoTimeFile(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, "blkio.io_serviced_recursive": servicedRecursiveContents, "blkio.io_queued_recursive": queuedRecursiveContents, "blkio.io_service_time_recursive": serviceTimeRecursiveContents, "blkio.io_wait_time_recursive": waitTimeRecursiveContents, "blkio.io_merged_recursive": mergedRecursiveContents, "blkio.sectors_recursive": sectorsRecursiveContents, }) blkio := &BlkioGroup{} actualStats := *cgroups.NewStats() err := blkio.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatalf("Failed unexpectedly: %s", err) } } func TestBlkioStatsUnexpectedNumberOfFields(t *testing.T) { helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "blkio.io_service_bytes_recursive": "8:0 Read 100 100", "blkio.io_serviced_recursive": servicedRecursiveContents, "blkio.io_queued_recursive": queuedRecursiveContents, "blkio.sectors_recursive": sectorsRecursiveContents, "blkio.io_service_time_recursive": serviceTimeRecursiveContents, "blkio.io_wait_time_recursive": waitTimeRecursiveContents, "blkio.io_merged_recursive": mergedRecursiveContents, "blkio.time_recursive": timeRecursiveContents, }) blkio := &BlkioGroup{} actualStats := *cgroups.NewStats() err := blkio.GetStats(helper.CgroupPath, &actualStats) if err == nil { t.Fatal("Expected to fail, but did not") } } func TestBlkioStatsUnexpectedFieldType(t *testing.T) { helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "blkio.io_service_bytes_recursive": "8:0 Read Write", "blkio.io_serviced_recursive": servicedRecursiveContents, "blkio.io_queued_recursive": queuedRecursiveContents, "blkio.sectors_recursive": sectorsRecursiveContents, "blkio.io_service_time_recursive": serviceTimeRecursiveContents, "blkio.io_wait_time_recursive": waitTimeRecursiveContents, "blkio.io_merged_recursive": mergedRecursiveContents, "blkio.time_recursive": timeRecursiveContents, }) blkio := &BlkioGroup{} actualStats := *cgroups.NewStats() err := blkio.GetStats(helper.CgroupPath, &actualStats) if err == nil { t.Fatal("Expected to fail, but did not") } } func TestNonCFQBlkioStats(t *testing.T) { helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "blkio.io_service_bytes_recursive": "", "blkio.io_serviced_recursive": "", "blkio.io_queued_recursive": "", "blkio.sectors_recursive": "", "blkio.io_service_time_recursive": "", "blkio.io_wait_time_recursive": "", "blkio.io_merged_recursive": "", "blkio.time_recursive": "", "blkio.throttle.io_service_bytes": throttleServiceBytes, "blkio.throttle.io_serviced": throttleServiced, }) blkio := &BlkioGroup{} actualStats := *cgroups.NewStats() err := blkio.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatal(err) } // Verify expected stats. expectedStats := cgroups.BlkioStats{} appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Read") appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 23, "Write") appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 42, "Sync") appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Async") appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Total") appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Read") appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 23, "Write") appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 42, "Sync") appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Async") appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Total") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Read") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 23, "Write") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 42, "Sync") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Async") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Total") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Read") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 23, "Write") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 42, "Sync") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Async") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Total") expectBlkioStatsEquals(t, expectedStats, actualStats.BlkioStats) } func TestBlkioSetThrottleReadBpsDevice(t *testing.T) { helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() const ( throttleBefore = `8:0 1024` ) td := configs.NewThrottleDevice(8, 0, 2048) throttleAfter := td.String() helper.writeFileContents(map[string]string{ "blkio.throttle.read_bps_device": throttleBefore, }) helper.CgroupData.config.Resources.BlkioThrottleReadBpsDevice = []*configs.ThrottleDevice{td} blkio := &BlkioGroup{} if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } value, err := getCgroupParamString(helper.CgroupPath, "blkio.throttle.read_bps_device") if err != nil { t.Fatalf("Failed to parse blkio.throttle.read_bps_device - %s", err) } if value != throttleAfter { t.Fatal("Got the wrong value, set blkio.throttle.read_bps_device failed.") } } func TestBlkioSetThrottleWriteBpsDevice(t *testing.T) { helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() const ( throttleBefore = `8:0 1024` ) td := configs.NewThrottleDevice(8, 0, 2048) throttleAfter := td.String() helper.writeFileContents(map[string]string{ "blkio.throttle.write_bps_device": throttleBefore, }) helper.CgroupData.config.Resources.BlkioThrottleWriteBpsDevice = []*configs.ThrottleDevice{td} blkio := &BlkioGroup{} if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } value, err := getCgroupParamString(helper.CgroupPath, "blkio.throttle.write_bps_device") if err != nil { t.Fatalf("Failed to parse blkio.throttle.write_bps_device - %s", err) } if value != throttleAfter { t.Fatal("Got the wrong value, set blkio.throttle.write_bps_device failed.") } } func TestBlkioSetThrottleReadIOpsDevice(t *testing.T) { helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() const ( throttleBefore = `8:0 1024` ) td := configs.NewThrottleDevice(8, 0, 2048) throttleAfter := td.String() helper.writeFileContents(map[string]string{ "blkio.throttle.read_iops_device": throttleBefore, }) helper.CgroupData.config.Resources.BlkioThrottleReadIOPSDevice = []*configs.ThrottleDevice{td} blkio := &BlkioGroup{} if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } value, err := getCgroupParamString(helper.CgroupPath, "blkio.throttle.read_iops_device") if err != nil { t.Fatalf("Failed to parse blkio.throttle.read_iops_device - %s", err) } if value != throttleAfter { t.Fatal("Got the wrong value, set blkio.throttle.read_iops_device failed.") } } func TestBlkioSetThrottleWriteIOpsDevice(t *testing.T) { helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() const ( throttleBefore = `8:0 1024` ) td := configs.NewThrottleDevice(8, 0, 2048) throttleAfter := td.String() helper.writeFileContents(map[string]string{ "blkio.throttle.write_iops_device": throttleBefore, }) helper.CgroupData.config.Resources.BlkioThrottleWriteIOPSDevice = []*configs.ThrottleDevice{td} blkio := &BlkioGroup{} if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } value, err := getCgroupParamString(helper.CgroupPath, "blkio.throttle.write_iops_device") if err != nil { t.Fatalf("Failed to parse blkio.throttle.write_iops_device - %s", err) } if value != throttleAfter { t.Fatal("Got the wrong value, set blkio.throttle.write_iops_device failed.") } } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/cpu.go000066400000000000000000000060151304443252500240770ustar00rootroot00000000000000// +build linux package fs import ( "bufio" "os" "path/filepath" "strconv" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" ) type CpuGroup struct { } func (s *CpuGroup) Name() string { return "cpu" } func (s *CpuGroup) Apply(d *cgroupData) error { // We always want to join the cpu group, to allow fair cpu scheduling // on a container basis path, err := d.path("cpu") if err != nil && !cgroups.IsNotFound(err) { return err } return s.ApplyDir(path, d.config, d.pid) } func (s *CpuGroup) ApplyDir(path string, cgroup *configs.Cgroup, pid int) error { // This might happen if we have no cpu cgroup mounted. // Just do nothing and don't fail. if path == "" { return nil } if err := os.MkdirAll(path, 0755); err != nil { return err } // We should set the real-Time group scheduling settings before moving // in the process because if the process is already in SCHED_RR mode // and no RT bandwidth is set, adding it will fail. if err := s.SetRtSched(path, cgroup); err != nil { return err } // because we are not using d.join we need to place the pid into the procs file // unlike the other subsystems if err := cgroups.WriteCgroupProc(path, pid); err != nil { return err } return nil } func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error { if cgroup.Resources.CpuRtPeriod != 0 { if err := writeFile(path, "cpu.rt_period_us", strconv.FormatInt(cgroup.Resources.CpuRtPeriod, 10)); err != nil { return err } } if cgroup.Resources.CpuRtRuntime != 0 { if err := writeFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.Resources.CpuRtRuntime, 10)); err != nil { return err } } return nil } func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error { if cgroup.Resources.CpuShares != 0 { if err := writeFile(path, "cpu.shares", strconv.FormatInt(cgroup.Resources.CpuShares, 10)); err != nil { return err } } if cgroup.Resources.CpuPeriod != 0 { if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatInt(cgroup.Resources.CpuPeriod, 10)); err != nil { return err } } if cgroup.Resources.CpuQuota != 0 { if err := writeFile(path, "cpu.cfs_quota_us", strconv.FormatInt(cgroup.Resources.CpuQuota, 10)); err != nil { return err } } if err := s.SetRtSched(path, cgroup); err != nil { return err } return nil } func (s *CpuGroup) Remove(d *cgroupData) error { return removePath(d.path("cpu")) } func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error { f, err := os.Open(filepath.Join(path, "cpu.stat")) if err != nil { if os.IsNotExist(err) { return nil } return err } defer f.Close() sc := bufio.NewScanner(f) for sc.Scan() { t, v, err := getCgroupParamKeyValue(sc.Text()) if err != nil { return err } switch t { case "nr_periods": stats.CpuStats.ThrottlingData.Periods = v case "nr_throttled": stats.CpuStats.ThrottlingData.ThrottledPeriods = v case "throttled_time": stats.CpuStats.ThrottlingData.ThrottledTime = v } } return nil } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/cpu_test.go000066400000000000000000000127331304443252500251420ustar00rootroot00000000000000// +build linux package fs import ( "fmt" "strconv" "testing" "github.com/opencontainers/runc/libcontainer/cgroups" ) func TestCpuSetShares(t *testing.T) { helper := NewCgroupTestUtil("cpu", t) defer helper.cleanup() const ( sharesBefore = 1024 sharesAfter = 512 ) helper.writeFileContents(map[string]string{ "cpu.shares": strconv.Itoa(sharesBefore), }) helper.CgroupData.config.Resources.CpuShares = sharesAfter cpu := &CpuGroup{} if err := cpu.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } value, err := getCgroupParamUint(helper.CgroupPath, "cpu.shares") if err != nil { t.Fatalf("Failed to parse cpu.shares - %s", err) } if value != sharesAfter { t.Fatal("Got the wrong value, set cpu.shares failed.") } } func TestCpuSetBandWidth(t *testing.T) { helper := NewCgroupTestUtil("cpu", t) defer helper.cleanup() const ( quotaBefore = 8000 quotaAfter = 5000 periodBefore = 10000 periodAfter = 7000 rtRuntimeBefore = 8000 rtRuntimeAfter = 5000 rtPeriodBefore = 10000 rtPeriodAfter = 7000 ) helper.writeFileContents(map[string]string{ "cpu.cfs_quota_us": strconv.Itoa(quotaBefore), "cpu.cfs_period_us": strconv.Itoa(periodBefore), "cpu.rt_runtime_us": strconv.Itoa(rtRuntimeBefore), "cpu.rt_period_us": strconv.Itoa(rtPeriodBefore), }) helper.CgroupData.config.Resources.CpuQuota = quotaAfter helper.CgroupData.config.Resources.CpuPeriod = periodAfter helper.CgroupData.config.Resources.CpuRtRuntime = rtRuntimeAfter helper.CgroupData.config.Resources.CpuRtPeriod = rtPeriodAfter cpu := &CpuGroup{} if err := cpu.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } quota, err := getCgroupParamUint(helper.CgroupPath, "cpu.cfs_quota_us") if err != nil { t.Fatalf("Failed to parse cpu.cfs_quota_us - %s", err) } if quota != quotaAfter { t.Fatal("Got the wrong value, set cpu.cfs_quota_us failed.") } period, err := getCgroupParamUint(helper.CgroupPath, "cpu.cfs_period_us") if err != nil { t.Fatalf("Failed to parse cpu.cfs_period_us - %s", err) } if period != periodAfter { t.Fatal("Got the wrong value, set cpu.cfs_period_us failed.") } rtRuntime, err := getCgroupParamUint(helper.CgroupPath, "cpu.rt_runtime_us") if err != nil { t.Fatalf("Failed to parse cpu.rt_runtime_us - %s", err) } if rtRuntime != rtRuntimeAfter { t.Fatal("Got the wrong value, set cpu.rt_runtime_us failed.") } rtPeriod, err := getCgroupParamUint(helper.CgroupPath, "cpu.rt_period_us") if err != nil { t.Fatalf("Failed to parse cpu.rt_period_us - %s", err) } if rtPeriod != rtPeriodAfter { t.Fatal("Got the wrong value, set cpu.rt_period_us failed.") } } func TestCpuStats(t *testing.T) { helper := NewCgroupTestUtil("cpu", t) defer helper.cleanup() const ( nrPeriods = 2000 nrThrottled = 200 throttledTime = uint64(18446744073709551615) ) cpuStatContent := fmt.Sprintf("nr_periods %d\n nr_throttled %d\n throttled_time %d\n", nrPeriods, nrThrottled, throttledTime) helper.writeFileContents(map[string]string{ "cpu.stat": cpuStatContent, }) cpu := &CpuGroup{} actualStats := *cgroups.NewStats() err := cpu.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatal(err) } expectedStats := cgroups.ThrottlingData{ Periods: nrPeriods, ThrottledPeriods: nrThrottled, ThrottledTime: throttledTime} expectThrottlingDataEquals(t, expectedStats, actualStats.CpuStats.ThrottlingData) } func TestNoCpuStatFile(t *testing.T) { helper := NewCgroupTestUtil("cpu", t) defer helper.cleanup() cpu := &CpuGroup{} actualStats := *cgroups.NewStats() err := cpu.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatal("Expected not to fail, but did") } } func TestInvalidCpuStat(t *testing.T) { helper := NewCgroupTestUtil("cpu", t) defer helper.cleanup() cpuStatContent := `nr_periods 2000 nr_throttled 200 throttled_time fortytwo` helper.writeFileContents(map[string]string{ "cpu.stat": cpuStatContent, }) cpu := &CpuGroup{} actualStats := *cgroups.NewStats() err := cpu.GetStats(helper.CgroupPath, &actualStats) if err == nil { t.Fatal("Expected failed stat parsing.") } } func TestCpuSetRtSchedAtApply(t *testing.T) { helper := NewCgroupTestUtil("cpu", t) defer helper.cleanup() const ( rtRuntimeBefore = 0 rtRuntimeAfter = 5000 rtPeriodBefore = 0 rtPeriodAfter = 7000 ) helper.writeFileContents(map[string]string{ "cpu.rt_runtime_us": strconv.Itoa(rtRuntimeBefore), "cpu.rt_period_us": strconv.Itoa(rtPeriodBefore), }) helper.CgroupData.config.Resources.CpuRtRuntime = rtRuntimeAfter helper.CgroupData.config.Resources.CpuRtPeriod = rtPeriodAfter cpu := &CpuGroup{} if err := cpu.ApplyDir(helper.CgroupPath, helper.CgroupData.config, 1234); err != nil { t.Fatal(err) } rtRuntime, err := getCgroupParamUint(helper.CgroupPath, "cpu.rt_runtime_us") if err != nil { t.Fatalf("Failed to parse cpu.rt_runtime_us - %s", err) } if rtRuntime != rtRuntimeAfter { t.Fatal("Got the wrong value, set cpu.rt_runtime_us failed.") } rtPeriod, err := getCgroupParamUint(helper.CgroupPath, "cpu.rt_period_us") if err != nil { t.Fatalf("Failed to parse cpu.rt_period_us - %s", err) } if rtPeriod != rtPeriodAfter { t.Fatal("Got the wrong value, set cpu.rt_period_us failed.") } pid, err := getCgroupParamUint(helper.CgroupPath, "cgroup.procs") if err != nil { t.Fatalf("Failed to parse cgroup.procs - %s", err) } if pid != 1234 { t.Fatal("Got the wrong value, set cgroup.procs failed.") } } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/cpuacct.go000066400000000000000000000061441304443252500247350ustar00rootroot00000000000000// +build linux package fs import ( "fmt" "io/ioutil" "path/filepath" "strconv" "strings" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/system" ) const ( cgroupCpuacctStat = "cpuacct.stat" nanosecondsInSecond = 1000000000 ) var clockTicks = uint64(system.GetClockTicks()) type CpuacctGroup struct { } func (s *CpuacctGroup) Name() string { return "cpuacct" } func (s *CpuacctGroup) Apply(d *cgroupData) error { // we just want to join this group even though we don't set anything if _, err := d.join("cpuacct"); err != nil && !cgroups.IsNotFound(err) { return err } return nil } func (s *CpuacctGroup) Set(path string, cgroup *configs.Cgroup) error { return nil } func (s *CpuacctGroup) Remove(d *cgroupData) error { return removePath(d.path("cpuacct")) } func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error { userModeUsage, kernelModeUsage, err := getCpuUsageBreakdown(path) if err != nil { return err } totalUsage, err := getCgroupParamUint(path, "cpuacct.usage") if err != nil { return err } percpuUsage, err := getPercpuUsage(path) if err != nil { return err } stats.CpuStats.CpuUsage.TotalUsage = totalUsage stats.CpuStats.CpuUsage.PercpuUsage = percpuUsage stats.CpuStats.CpuUsage.UsageInUsermode = userModeUsage stats.CpuStats.CpuUsage.UsageInKernelmode = kernelModeUsage return nil } // Returns user and kernel usage breakdown in nanoseconds. func getCpuUsageBreakdown(path string) (uint64, uint64, error) { userModeUsage := uint64(0) kernelModeUsage := uint64(0) const ( userField = "user" systemField = "system" ) // Expected format: // user // system data, err := ioutil.ReadFile(filepath.Join(path, cgroupCpuacctStat)) if err != nil { return 0, 0, err } fields := strings.Fields(string(data)) if len(fields) != 4 { return 0, 0, fmt.Errorf("failure - %s is expected to have 4 fields", filepath.Join(path, cgroupCpuacctStat)) } if fields[0] != userField { return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[0], cgroupCpuacctStat, userField) } if fields[2] != systemField { return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[2], cgroupCpuacctStat, systemField) } if userModeUsage, err = strconv.ParseUint(fields[1], 10, 64); err != nil { return 0, 0, err } if kernelModeUsage, err = strconv.ParseUint(fields[3], 10, 64); err != nil { return 0, 0, err } return (userModeUsage * nanosecondsInSecond) / clockTicks, (kernelModeUsage * nanosecondsInSecond) / clockTicks, nil } func getPercpuUsage(path string) ([]uint64, error) { percpuUsage := []uint64{} data, err := ioutil.ReadFile(filepath.Join(path, "cpuacct.usage_percpu")) if err != nil { return percpuUsage, err } for _, value := range strings.Fields(string(data)) { value, err := strconv.ParseUint(value, 10, 64) if err != nil { return percpuUsage, fmt.Errorf("Unable to convert param value to uint64: %s", err) } percpuUsage = append(percpuUsage, value) } return percpuUsage, nil } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/cpuset.go000066400000000000000000000066201304443252500246150ustar00rootroot00000000000000// +build linux package fs import ( "bytes" "fmt" "io/ioutil" "os" "path/filepath" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" ) type CpusetGroup struct { } func (s *CpusetGroup) Name() string { return "cpuset" } func (s *CpusetGroup) Apply(d *cgroupData) error { dir, err := d.path("cpuset") if err != nil && !cgroups.IsNotFound(err) { return err } return s.ApplyDir(dir, d.config, d.pid) } func (s *CpusetGroup) Set(path string, cgroup *configs.Cgroup) error { if cgroup.Resources.CpusetCpus != "" { if err := writeFile(path, "cpuset.cpus", cgroup.Resources.CpusetCpus); err != nil { return err } } if cgroup.Resources.CpusetMems != "" { if err := writeFile(path, "cpuset.mems", cgroup.Resources.CpusetMems); err != nil { return err } } return nil } func (s *CpusetGroup) Remove(d *cgroupData) error { return removePath(d.path("cpuset")) } func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error { return nil } func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) error { // This might happen if we have no cpuset cgroup mounted. // Just do nothing and don't fail. if dir == "" { return nil } root, err := getCgroupRoot() if err != nil { return err } if err := s.ensureParent(dir, root); err != nil { return err } // because we are not using d.join we need to place the pid into the procs file // unlike the other subsystems if err := cgroups.WriteCgroupProc(dir, pid); err != nil { return err } return nil } func (s *CpusetGroup) getSubsystemSettings(parent string) (cpus []byte, mems []byte, err error) { if cpus, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.cpus")); err != nil { return } if mems, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.mems")); err != nil { return } return cpus, mems, nil } // ensureParent makes sure that the parent directory of current is created // and populated with the proper cpus and mems files copied from // it's parent. func (s *CpusetGroup) ensureParent(current, root string) error { parent := filepath.Dir(current) if libcontainerUtils.CleanPath(parent) == root { return nil } // Avoid infinite recursion. if parent == current { return fmt.Errorf("cpuset: cgroup parent path outside cgroup root") } if err := s.ensureParent(parent, root); err != nil { return err } if err := os.MkdirAll(current, 0755); err != nil { return err } return s.copyIfNeeded(current, parent) } // copyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent // directory to the current directory if the file's contents are 0 func (s *CpusetGroup) copyIfNeeded(current, parent string) error { var ( err error currentCpus, currentMems []byte parentCpus, parentMems []byte ) if currentCpus, currentMems, err = s.getSubsystemSettings(current); err != nil { return err } if parentCpus, parentMems, err = s.getSubsystemSettings(parent); err != nil { return err } if s.isEmpty(currentCpus) { if err := writeFile(current, "cpuset.cpus", string(parentCpus)); err != nil { return err } } if s.isEmpty(currentMems) { if err := writeFile(current, "cpuset.mems", string(parentMems)); err != nil { return err } } return nil } func (s *CpusetGroup) isEmpty(b []byte) bool { return len(bytes.Trim(b, "\n")) == 0 } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/cpuset_test.go000066400000000000000000000025171304443252500256550ustar00rootroot00000000000000// +build linux package fs import ( "testing" ) func TestCpusetSetCpus(t *testing.T) { helper := NewCgroupTestUtil("cpuset", t) defer helper.cleanup() const ( cpusBefore = "0" cpusAfter = "1-3" ) helper.writeFileContents(map[string]string{ "cpuset.cpus": cpusBefore, }) helper.CgroupData.config.Resources.CpusetCpus = cpusAfter cpuset := &CpusetGroup{} if err := cpuset.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } value, err := getCgroupParamString(helper.CgroupPath, "cpuset.cpus") if err != nil { t.Fatalf("Failed to parse cpuset.cpus - %s", err) } if value != cpusAfter { t.Fatal("Got the wrong value, set cpuset.cpus failed.") } } func TestCpusetSetMems(t *testing.T) { helper := NewCgroupTestUtil("cpuset", t) defer helper.cleanup() const ( memsBefore = "0" memsAfter = "1" ) helper.writeFileContents(map[string]string{ "cpuset.mems": memsBefore, }) helper.CgroupData.config.Resources.CpusetMems = memsAfter cpuset := &CpusetGroup{} if err := cpuset.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } value, err := getCgroupParamString(helper.CgroupPath, "cpuset.mems") if err != nil { t.Fatalf("Failed to parse cpuset.mems - %s", err) } if value != memsAfter { t.Fatal("Got the wrong value, set cpuset.mems failed.") } } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/devices.go000066400000000000000000000033211304443252500247270ustar00rootroot00000000000000// +build linux package fs import ( "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/system" ) type DevicesGroup struct { } func (s *DevicesGroup) Name() string { return "devices" } func (s *DevicesGroup) Apply(d *cgroupData) error { _, err := d.join("devices") if err != nil { // We will return error even it's `not found` error, devices // cgroup is hard requirement for container's security. return err } return nil } func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error { if system.RunningInUserNS() { return nil } devices := cgroup.Resources.Devices if len(devices) > 0 { for _, dev := range devices { file := "devices.deny" if dev.Allow { file = "devices.allow" } if err := writeFile(path, file, dev.CgroupString()); err != nil { return err } } return nil } if cgroup.Resources.AllowAllDevices != nil { if *cgroup.Resources.AllowAllDevices == false { if err := writeFile(path, "devices.deny", "a"); err != nil { return err } for _, dev := range cgroup.Resources.AllowedDevices { if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil { return err } } return nil } if err := writeFile(path, "devices.allow", "a"); err != nil { return err } } for _, dev := range cgroup.Resources.DeniedDevices { if err := writeFile(path, "devices.deny", dev.CgroupString()); err != nil { return err } } return nil } func (s *DevicesGroup) Remove(d *cgroupData) error { return removePath(d.path("devices")) } func (s *DevicesGroup) GetStats(path string, stats *cgroups.Stats) error { return nil } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/devices_test.go000066400000000000000000000046151304443252500257750ustar00rootroot00000000000000// +build linux package fs import ( "testing" "github.com/opencontainers/runc/libcontainer/configs" ) var ( allowedDevices = []*configs.Device{ { Path: "/dev/zero", Type: 'c', Major: 1, Minor: 5, Permissions: "rwm", FileMode: 0666, }, } allowedList = "c 1:5 rwm" deniedDevices = []*configs.Device{ { Path: "/dev/null", Type: 'c', Major: 1, Minor: 3, Permissions: "rwm", FileMode: 0666, }, } deniedList = "c 1:3 rwm" ) func TestDevicesSetAllow(t *testing.T) { helper := NewCgroupTestUtil("devices", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "devices.deny": "a", }) allowAllDevices := false helper.CgroupData.config.Resources.AllowAllDevices = &allowAllDevices helper.CgroupData.config.Resources.AllowedDevices = allowedDevices devices := &DevicesGroup{} if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } value, err := getCgroupParamString(helper.CgroupPath, "devices.allow") if err != nil { t.Fatalf("Failed to parse devices.allow - %s", err) } if value != allowedList { t.Fatal("Got the wrong value, set devices.allow failed.") } // When AllowAllDevices is nil, devices.allow file should not be modified. helper.CgroupData.config.Resources.AllowAllDevices = nil if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } value, err = getCgroupParamString(helper.CgroupPath, "devices.allow") if err != nil { t.Fatalf("Failed to parse devices.allow - %s", err) } if value != allowedList { t.Fatal("devices policy shouldn't have changed on AllowedAllDevices=nil.") } } func TestDevicesSetDeny(t *testing.T) { helper := NewCgroupTestUtil("devices", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "devices.allow": "a", }) allowAllDevices := true helper.CgroupData.config.Resources.AllowAllDevices = &allowAllDevices helper.CgroupData.config.Resources.DeniedDevices = deniedDevices devices := &DevicesGroup{} if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } value, err := getCgroupParamString(helper.CgroupPath, "devices.deny") if err != nil { t.Fatalf("Failed to parse devices.deny - %s", err) } if value != deniedList { t.Fatal("Got the wrong value, set devices.deny failed.") } } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/freezer.go000066400000000000000000000023401304443252500247470ustar00rootroot00000000000000// +build linux package fs import ( "fmt" "strings" "time" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" ) type FreezerGroup struct { } func (s *FreezerGroup) Name() string { return "freezer" } func (s *FreezerGroup) Apply(d *cgroupData) error { _, err := d.join("freezer") if err != nil && !cgroups.IsNotFound(err) { return err } return nil } func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error { switch cgroup.Resources.Freezer { case configs.Frozen, configs.Thawed: if err := writeFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil { return err } for { state, err := readFile(path, "freezer.state") if err != nil { return err } if strings.TrimSpace(state) == string(cgroup.Resources.Freezer) { break } time.Sleep(1 * time.Millisecond) } case configs.Undefined: return nil default: return fmt.Errorf("Invalid argument '%s' to freezer.state", string(cgroup.Resources.Freezer)) } return nil } func (s *FreezerGroup) Remove(d *cgroupData) error { return removePath(d.path("freezer")) } func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error { return nil } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/freezer_test.go000066400000000000000000000021761304443252500260150ustar00rootroot00000000000000// +build linux package fs import ( "testing" "github.com/opencontainers/runc/libcontainer/configs" ) func TestFreezerSetState(t *testing.T) { helper := NewCgroupTestUtil("freezer", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "freezer.state": string(configs.Frozen), }) helper.CgroupData.config.Resources.Freezer = configs.Thawed freezer := &FreezerGroup{} if err := freezer.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } value, err := getCgroupParamString(helper.CgroupPath, "freezer.state") if err != nil { t.Fatalf("Failed to parse freezer.state - %s", err) } if value != string(configs.Thawed) { t.Fatal("Got the wrong value, set freezer.state failed.") } } func TestFreezerSetInvalidState(t *testing.T) { helper := NewCgroupTestUtil("freezer", t) defer helper.cleanup() const ( invalidArg configs.FreezerState = "Invalid" ) helper.CgroupData.config.Resources.Freezer = invalidArg freezer := &FreezerGroup{} if err := freezer.Set(helper.CgroupPath, helper.CgroupData.config); err == nil { t.Fatal("Failed to return invalid argument error") } } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/fs_unsupported.go000066400000000000000000000000351304443252500263640ustar00rootroot00000000000000// +build !linux package fs docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/hugetlb.go000066400000000000000000000034031304443252500247400ustar00rootroot00000000000000// +build linux package fs import ( "fmt" "strconv" "strings" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" ) type HugetlbGroup struct { } func (s *HugetlbGroup) Name() string { return "hugetlb" } func (s *HugetlbGroup) Apply(d *cgroupData) error { _, err := d.join("hugetlb") if err != nil && !cgroups.IsNotFound(err) { return err } return nil } func (s *HugetlbGroup) Set(path string, cgroup *configs.Cgroup) error { for _, hugetlb := range cgroup.Resources.HugetlbLimit { if err := writeFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.FormatUint(hugetlb.Limit, 10)); err != nil { return err } } return nil } func (s *HugetlbGroup) Remove(d *cgroupData) error { return removePath(d.path("hugetlb")) } func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error { hugetlbStats := cgroups.HugetlbStats{} for _, pageSize := range HugePageSizes { usage := strings.Join([]string{"hugetlb", pageSize, "usage_in_bytes"}, ".") value, err := getCgroupParamUint(path, usage) if err != nil { return fmt.Errorf("failed to parse %s - %v", usage, err) } hugetlbStats.Usage = value maxUsage := strings.Join([]string{"hugetlb", pageSize, "max_usage_in_bytes"}, ".") value, err = getCgroupParamUint(path, maxUsage) if err != nil { return fmt.Errorf("failed to parse %s - %v", maxUsage, err) } hugetlbStats.MaxUsage = value failcnt := strings.Join([]string{"hugetlb", pageSize, "failcnt"}, ".") value, err = getCgroupParamUint(path, failcnt) if err != nil { return fmt.Errorf("failed to parse %s - %v", failcnt, err) } hugetlbStats.Failcnt = value stats.HugetlbStats[pageSize] = hugetlbStats } return nil } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/hugetlb_test.go000066400000000000000000000075031304443252500260040ustar00rootroot00000000000000// +build linux package fs import ( "fmt" "strconv" "testing" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" ) const ( hugetlbUsageContents = "128\n" hugetlbMaxUsageContents = "256\n" hugetlbFailcnt = "100\n" ) var ( usage = "hugetlb.%s.usage_in_bytes" limit = "hugetlb.%s.limit_in_bytes" maxUsage = "hugetlb.%s.max_usage_in_bytes" failcnt = "hugetlb.%s.failcnt" ) func TestHugetlbSetHugetlb(t *testing.T) { helper := NewCgroupTestUtil("hugetlb", t) defer helper.cleanup() const ( hugetlbBefore = 256 hugetlbAfter = 512 ) for _, pageSize := range HugePageSizes { helper.writeFileContents(map[string]string{ fmt.Sprintf(limit, pageSize): strconv.Itoa(hugetlbBefore), }) } for _, pageSize := range HugePageSizes { helper.CgroupData.config.Resources.HugetlbLimit = []*configs.HugepageLimit{ { Pagesize: pageSize, Limit: hugetlbAfter, }, } hugetlb := &HugetlbGroup{} if err := hugetlb.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } } for _, pageSize := range HugePageSizes { limit := fmt.Sprintf(limit, pageSize) value, err := getCgroupParamUint(helper.CgroupPath, limit) if err != nil { t.Fatalf("Failed to parse %s - %s", limit, err) } if value != hugetlbAfter { t.Fatalf("Set hugetlb.limit_in_bytes failed. Expected: %v, Got: %v", hugetlbAfter, value) } } } func TestHugetlbStats(t *testing.T) { helper := NewCgroupTestUtil("hugetlb", t) defer helper.cleanup() for _, pageSize := range HugePageSizes { helper.writeFileContents(map[string]string{ fmt.Sprintf(usage, pageSize): hugetlbUsageContents, fmt.Sprintf(maxUsage, pageSize): hugetlbMaxUsageContents, fmt.Sprintf(failcnt, pageSize): hugetlbFailcnt, }) } hugetlb := &HugetlbGroup{} actualStats := *cgroups.NewStats() err := hugetlb.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatal(err) } expectedStats := cgroups.HugetlbStats{Usage: 128, MaxUsage: 256, Failcnt: 100} for _, pageSize := range HugePageSizes { expectHugetlbStatEquals(t, expectedStats, actualStats.HugetlbStats[pageSize]) } } func TestHugetlbStatsNoUsageFile(t *testing.T) { helper := NewCgroupTestUtil("hugetlb", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ maxUsage: hugetlbMaxUsageContents, }) hugetlb := &HugetlbGroup{} actualStats := *cgroups.NewStats() err := hugetlb.GetStats(helper.CgroupPath, &actualStats) if err == nil { t.Fatal("Expected failure") } } func TestHugetlbStatsNoMaxUsageFile(t *testing.T) { helper := NewCgroupTestUtil("hugetlb", t) defer helper.cleanup() for _, pageSize := range HugePageSizes { helper.writeFileContents(map[string]string{ fmt.Sprintf(usage, pageSize): hugetlbUsageContents, }) } hugetlb := &HugetlbGroup{} actualStats := *cgroups.NewStats() err := hugetlb.GetStats(helper.CgroupPath, &actualStats) if err == nil { t.Fatal("Expected failure") } } func TestHugetlbStatsBadUsageFile(t *testing.T) { helper := NewCgroupTestUtil("hugetlb", t) defer helper.cleanup() for _, pageSize := range HugePageSizes { helper.writeFileContents(map[string]string{ fmt.Sprintf(usage, pageSize): "bad", maxUsage: hugetlbMaxUsageContents, }) } hugetlb := &HugetlbGroup{} actualStats := *cgroups.NewStats() err := hugetlb.GetStats(helper.CgroupPath, &actualStats) if err == nil { t.Fatal("Expected failure") } } func TestHugetlbStatsBadMaxUsageFile(t *testing.T) { helper := NewCgroupTestUtil("hugetlb", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ usage: hugetlbUsageContents, maxUsage: "bad", }) hugetlb := &HugetlbGroup{} actualStats := *cgroups.NewStats() err := hugetlb.GetStats(helper.CgroupPath, &actualStats) if err == nil { t.Fatal("Expected failure") } } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/memory.go000066400000000000000000000207251304443252500246240ustar00rootroot00000000000000// +build linux package fs import ( "bufio" "fmt" "io/ioutil" "os" "path/filepath" "strconv" "strings" "syscall" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" ) const cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes" type MemoryGroup struct { } func (s *MemoryGroup) Name() string { return "memory" } func (s *MemoryGroup) Apply(d *cgroupData) (err error) { path, err := d.path("memory") if err != nil && !cgroups.IsNotFound(err) { return err } if memoryAssigned(d.config) { if path != "" { if err := os.MkdirAll(path, 0755); err != nil { return err } } if d.config.KernelMemory != 0 { if err := EnableKernelMemoryAccounting(path); err != nil { return err } } } defer func() { if err != nil { os.RemoveAll(path) } }() // We need to join memory cgroup after set memory limits, because // kmem.limit_in_bytes can only be set when the cgroup is empty. _, err = d.join("memory") if err != nil && !cgroups.IsNotFound(err) { return err } return nil } func EnableKernelMemoryAccounting(path string) error { // Check if kernel memory is enabled // We have to limit the kernel memory here as it won't be accounted at all // until a limit is set on the cgroup and limit cannot be set once the // cgroup has children, or if there are already tasks in the cgroup. for _, i := range []int64{1, -1} { if err := setKernelMemory(path, i); err != nil { return err } } return nil } func setKernelMemory(path string, kernelMemoryLimit int64) error { if path == "" { return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit) } if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) { // kernel memory is not enabled on the system so we should do nothing return nil } if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil { // Check if the error number returned by the syscall is "EBUSY" // The EBUSY signal is returned on attempts to write to the // memory.kmem.limit_in_bytes file if the cgroup has children or // once tasks have been attached to the cgroup if pathErr, ok := err.(*os.PathError); ok { if errNo, ok := pathErr.Err.(syscall.Errno); ok { if errNo == syscall.EBUSY { return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit) } } } return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err) } return nil } func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error { // When memory and swap memory are both set, we need to handle the cases // for updating container. if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap > 0 { memoryUsage, err := getMemoryData(path, "") if err != nil { return err } // When update memory limit, we should adapt the write sequence // for memory and swap memory, so it won't fail because the new // value and the old value don't fit kernel's validation. if memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) { if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil { return err } if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil { return err } } else { if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil { return err } if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil { return err } } } else { if cgroup.Resources.Memory != 0 { if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil { return err } } if cgroup.Resources.MemorySwap > 0 { if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil { return err } } } return nil } func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error { if err := setMemoryAndSwap(path, cgroup); err != nil { return err } if cgroup.Resources.KernelMemory != 0 { if err := setKernelMemory(path, cgroup.Resources.KernelMemory); err != nil { return err } } if cgroup.Resources.MemoryReservation != 0 { if err := writeFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil { return err } } if cgroup.Resources.KernelMemoryTCP != 0 { if err := writeFile(path, "memory.kmem.tcp.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemoryTCP, 10)); err != nil { return err } } if cgroup.Resources.OomKillDisable { if err := writeFile(path, "memory.oom_control", "1"); err != nil { return err } } if cgroup.Resources.MemorySwappiness == nil || int64(*cgroup.Resources.MemorySwappiness) == -1 { return nil } else if int64(*cgroup.Resources.MemorySwappiness) >= 0 && int64(*cgroup.Resources.MemorySwappiness) <= 100 { if err := writeFile(path, "memory.swappiness", strconv.FormatInt(*cgroup.Resources.MemorySwappiness, 10)); err != nil { return err } } else { return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", int64(*cgroup.Resources.MemorySwappiness)) } return nil } func (s *MemoryGroup) Remove(d *cgroupData) error { return removePath(d.path("memory")) } func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error { // Set stats from memory.stat. statsFile, err := os.Open(filepath.Join(path, "memory.stat")) if err != nil { if os.IsNotExist(err) { return nil } return err } defer statsFile.Close() sc := bufio.NewScanner(statsFile) for sc.Scan() { t, v, err := getCgroupParamKeyValue(sc.Text()) if err != nil { return fmt.Errorf("failed to parse memory.stat (%q) - %v", sc.Text(), err) } stats.MemoryStats.Stats[t] = v } stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"] memoryUsage, err := getMemoryData(path, "") if err != nil { return err } stats.MemoryStats.Usage = memoryUsage swapUsage, err := getMemoryData(path, "memsw") if err != nil { return err } stats.MemoryStats.SwapUsage = swapUsage kernelUsage, err := getMemoryData(path, "kmem") if err != nil { return err } stats.MemoryStats.KernelUsage = kernelUsage kernelTCPUsage, err := getMemoryData(path, "kmem.tcp") if err != nil { return err } stats.MemoryStats.KernelTCPUsage = kernelTCPUsage return nil } func memoryAssigned(cgroup *configs.Cgroup) bool { return cgroup.Resources.Memory != 0 || cgroup.Resources.MemoryReservation != 0 || cgroup.Resources.MemorySwap > 0 || cgroup.Resources.KernelMemory > 0 || cgroup.Resources.KernelMemoryTCP > 0 || cgroup.Resources.OomKillDisable || (cgroup.Resources.MemorySwappiness != nil && *cgroup.Resources.MemorySwappiness != -1) } func getMemoryData(path, name string) (cgroups.MemoryData, error) { memoryData := cgroups.MemoryData{} moduleName := "memory" if name != "" { moduleName = strings.Join([]string{"memory", name}, ".") } usage := strings.Join([]string{moduleName, "usage_in_bytes"}, ".") maxUsage := strings.Join([]string{moduleName, "max_usage_in_bytes"}, ".") failcnt := strings.Join([]string{moduleName, "failcnt"}, ".") limit := strings.Join([]string{moduleName, "limit_in_bytes"}, ".") value, err := getCgroupParamUint(path, usage) if err != nil { if moduleName != "memory" && os.IsNotExist(err) { return cgroups.MemoryData{}, nil } return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", usage, err) } memoryData.Usage = value value, err = getCgroupParamUint(path, maxUsage) if err != nil { if moduleName != "memory" && os.IsNotExist(err) { return cgroups.MemoryData{}, nil } return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", maxUsage, err) } memoryData.MaxUsage = value value, err = getCgroupParamUint(path, failcnt) if err != nil { if moduleName != "memory" && os.IsNotExist(err) { return cgroups.MemoryData{}, nil } return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", failcnt, err) } memoryData.Failcnt = value value, err = getCgroupParamUint(path, limit) if err != nil { if moduleName != "memory" && os.IsNotExist(err) { return cgroups.MemoryData{}, nil } return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", limit, err) } memoryData.Limit = value return memoryData, nil } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/memory_test.go000066400000000000000000000356651304443252500256740ustar00rootroot00000000000000// +build linux package fs import ( "strconv" "testing" "github.com/opencontainers/runc/libcontainer/cgroups" ) const ( memoryStatContents = `cache 512 rss 1024` memoryUsageContents = "2048\n" memoryMaxUsageContents = "4096\n" memoryFailcnt = "100\n" memoryLimitContents = "8192\n" ) func TestMemorySetMemory(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() const ( memoryBefore = 314572800 // 300M memoryAfter = 524288000 // 500M reservationBefore = 209715200 // 200M reservationAfter = 314572800 // 300M ) helper.writeFileContents(map[string]string{ "memory.limit_in_bytes": strconv.Itoa(memoryBefore), "memory.soft_limit_in_bytes": strconv.Itoa(reservationBefore), }) helper.CgroupData.config.Resources.Memory = memoryAfter helper.CgroupData.config.Resources.MemoryReservation = reservationAfter memory := &MemoryGroup{} if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } value, err := getCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes") if err != nil { t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err) } if value != memoryAfter { t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.") } value, err = getCgroupParamUint(helper.CgroupPath, "memory.soft_limit_in_bytes") if err != nil { t.Fatalf("Failed to parse memory.soft_limit_in_bytes - %s", err) } if value != reservationAfter { t.Fatal("Got the wrong value, set memory.soft_limit_in_bytes failed.") } } func TestMemorySetMemoryswap(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() const ( memoryswapBefore = 314572800 // 300M memoryswapAfter = 524288000 // 500M ) helper.writeFileContents(map[string]string{ "memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore), }) helper.CgroupData.config.Resources.MemorySwap = memoryswapAfter memory := &MemoryGroup{} if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } value, err := getCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes") if err != nil { t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err) } if value != memoryswapAfter { t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.") } } func TestMemorySetNegativeMemoryswap(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() const ( memoryBefore = 314572800 // 300M memoryAfter = 524288000 // 500M memoryswapBefore = 629145600 // 600M memoryswapAfter = 629145600 // 600M ) helper.writeFileContents(map[string]string{ "memory.limit_in_bytes": strconv.Itoa(memoryBefore), "memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore), }) helper.CgroupData.config.Resources.Memory = memoryAfter // Negative value means not change helper.CgroupData.config.Resources.MemorySwap = -1 memory := &MemoryGroup{} if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } value, err := getCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes") if err != nil { t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err) } if value != memoryAfter { t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.") } value, err = getCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes") if err != nil { t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err) } if value != memoryswapAfter { t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.") } } func TestMemorySetMemoryLargerThanSwap(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() const ( memoryBefore = 314572800 // 300M memoryswapBefore = 524288000 // 500M memoryAfter = 629145600 // 600M memoryswapAfter = 838860800 // 800M ) helper.writeFileContents(map[string]string{ "memory.limit_in_bytes": strconv.Itoa(memoryBefore), "memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore), // Set will call getMemoryData when memory and swap memory are // both set, fake these fields so we don't get error. "memory.usage_in_bytes": "0", "memory.max_usage_in_bytes": "0", "memory.failcnt": "0", }) helper.CgroupData.config.Resources.Memory = memoryAfter helper.CgroupData.config.Resources.MemorySwap = memoryswapAfter memory := &MemoryGroup{} if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } value, err := getCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes") if err != nil { t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err) } if value != memoryAfter { t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.") } value, err = getCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes") if err != nil { t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err) } if value != memoryswapAfter { t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.") } } func TestMemorySetSwapSmallerThanMemory(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() const ( memoryBefore = 629145600 // 600M memoryswapBefore = 838860800 // 800M memoryAfter = 314572800 // 300M memoryswapAfter = 524288000 // 500M ) helper.writeFileContents(map[string]string{ "memory.limit_in_bytes": strconv.Itoa(memoryBefore), "memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore), // Set will call getMemoryData when memory and swap memory are // both set, fake these fields so we don't get error. "memory.usage_in_bytes": "0", "memory.max_usage_in_bytes": "0", "memory.failcnt": "0", }) helper.CgroupData.config.Resources.Memory = memoryAfter helper.CgroupData.config.Resources.MemorySwap = memoryswapAfter memory := &MemoryGroup{} if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } value, err := getCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes") if err != nil { t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err) } if value != memoryAfter { t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.") } value, err = getCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes") if err != nil { t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err) } if value != memoryswapAfter { t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.") } } func TestMemorySetKernelMemory(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() const ( kernelMemoryBefore = 314572800 // 300M kernelMemoryAfter = 524288000 // 500M ) helper.writeFileContents(map[string]string{ "memory.kmem.limit_in_bytes": strconv.Itoa(kernelMemoryBefore), }) helper.CgroupData.config.Resources.KernelMemory = kernelMemoryAfter memory := &MemoryGroup{} if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } value, err := getCgroupParamUint(helper.CgroupPath, "memory.kmem.limit_in_bytes") if err != nil { t.Fatalf("Failed to parse memory.kmem.limit_in_bytes - %s", err) } if value != kernelMemoryAfter { t.Fatal("Got the wrong value, set memory.kmem.limit_in_bytes failed.") } } func TestMemorySetKernelMemoryTCP(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() const ( kernelMemoryTCPBefore = 314572800 // 300M kernelMemoryTCPAfter = 524288000 // 500M ) helper.writeFileContents(map[string]string{ "memory.kmem.tcp.limit_in_bytes": strconv.Itoa(kernelMemoryTCPBefore), }) helper.CgroupData.config.Resources.KernelMemoryTCP = kernelMemoryTCPAfter memory := &MemoryGroup{} if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } value, err := getCgroupParamUint(helper.CgroupPath, "memory.kmem.tcp.limit_in_bytes") if err != nil { t.Fatalf("Failed to parse memory.kmem.tcp.limit_in_bytes - %s", err) } if value != kernelMemoryTCPAfter { t.Fatal("Got the wrong value, set memory.kmem.tcp.limit_in_bytes failed.") } } func TestMemorySetMemorySwappinessDefault(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() swappinessBefore := 60 //default is 60 swappinessAfter := int64(0) helper.writeFileContents(map[string]string{ "memory.swappiness": strconv.Itoa(swappinessBefore), }) helper.CgroupData.config.Resources.MemorySwappiness = &swappinessAfter memory := &MemoryGroup{} if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } value, err := getCgroupParamUint(helper.CgroupPath, "memory.swappiness") if err != nil { t.Fatalf("Failed to parse memory.swappiness - %s", err) } if int64(value) != swappinessAfter { t.Fatalf("Got the wrong value (%d), set memory.swappiness = %d failed.", value, swappinessAfter) } } func TestMemoryStats(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "memory.stat": memoryStatContents, "memory.usage_in_bytes": memoryUsageContents, "memory.limit_in_bytes": memoryLimitContents, "memory.max_usage_in_bytes": memoryMaxUsageContents, "memory.failcnt": memoryFailcnt, "memory.memsw.usage_in_bytes": memoryUsageContents, "memory.memsw.max_usage_in_bytes": memoryMaxUsageContents, "memory.memsw.failcnt": memoryFailcnt, "memory.memsw.limit_in_bytes": memoryLimitContents, "memory.kmem.usage_in_bytes": memoryUsageContents, "memory.kmem.max_usage_in_bytes": memoryMaxUsageContents, "memory.kmem.failcnt": memoryFailcnt, "memory.kmem.limit_in_bytes": memoryLimitContents, }) memory := &MemoryGroup{} actualStats := *cgroups.NewStats() err := memory.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatal(err) } expectedStats := cgroups.MemoryStats{Cache: 512, Usage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, SwapUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, KernelUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, Stats: map[string]uint64{"cache": 512, "rss": 1024}} expectMemoryStatEquals(t, expectedStats, actualStats.MemoryStats) } func TestMemoryStatsNoStatFile(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "memory.usage_in_bytes": memoryUsageContents, "memory.max_usage_in_bytes": memoryMaxUsageContents, "memory.limit_in_bytes": memoryLimitContents, }) memory := &MemoryGroup{} actualStats := *cgroups.NewStats() err := memory.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatal(err) } } func TestMemoryStatsNoUsageFile(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "memory.stat": memoryStatContents, "memory.max_usage_in_bytes": memoryMaxUsageContents, "memory.limit_in_bytes": memoryLimitContents, }) memory := &MemoryGroup{} actualStats := *cgroups.NewStats() err := memory.GetStats(helper.CgroupPath, &actualStats) if err == nil { t.Fatal("Expected failure") } } func TestMemoryStatsNoMaxUsageFile(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "memory.stat": memoryStatContents, "memory.usage_in_bytes": memoryUsageContents, "memory.limit_in_bytes": memoryLimitContents, }) memory := &MemoryGroup{} actualStats := *cgroups.NewStats() err := memory.GetStats(helper.CgroupPath, &actualStats) if err == nil { t.Fatal("Expected failure") } } func TestMemoryStatsNoLimitInBytesFile(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "memory.stat": memoryStatContents, "memory.usage_in_bytes": memoryUsageContents, "memory.max_usage_in_bytes": memoryMaxUsageContents, }) memory := &MemoryGroup{} actualStats := *cgroups.NewStats() err := memory.GetStats(helper.CgroupPath, &actualStats) if err == nil { t.Fatal("Expected failure") } } func TestMemoryStatsBadStatFile(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "memory.stat": "rss rss", "memory.usage_in_bytes": memoryUsageContents, "memory.max_usage_in_bytes": memoryMaxUsageContents, "memory.limit_in_bytes": memoryLimitContents, }) memory := &MemoryGroup{} actualStats := *cgroups.NewStats() err := memory.GetStats(helper.CgroupPath, &actualStats) if err == nil { t.Fatal("Expected failure") } } func TestMemoryStatsBadUsageFile(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "memory.stat": memoryStatContents, "memory.usage_in_bytes": "bad", "memory.max_usage_in_bytes": memoryMaxUsageContents, "memory.limit_in_bytes": memoryLimitContents, }) memory := &MemoryGroup{} actualStats := *cgroups.NewStats() err := memory.GetStats(helper.CgroupPath, &actualStats) if err == nil { t.Fatal("Expected failure") } } func TestMemoryStatsBadMaxUsageFile(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "memory.stat": memoryStatContents, "memory.usage_in_bytes": memoryUsageContents, "memory.max_usage_in_bytes": "bad", "memory.limit_in_bytes": memoryLimitContents, }) memory := &MemoryGroup{} actualStats := *cgroups.NewStats() err := memory.GetStats(helper.CgroupPath, &actualStats) if err == nil { t.Fatal("Expected failure") } } func TestMemoryStatsBadLimitInBytesFile(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "memory.stat": memoryStatContents, "memory.usage_in_bytes": memoryUsageContents, "memory.max_usage_in_bytes": memoryMaxUsageContents, "memory.limit_in_bytes": "bad", }) memory := &MemoryGroup{} actualStats := *cgroups.NewStats() err := memory.GetStats(helper.CgroupPath, &actualStats) if err == nil { t.Fatal("Expected failure") } } func TestMemorySetOomControl(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() const ( oomKillDisable = 1 // disable oom killer, default is 0 ) helper.writeFileContents(map[string]string{ "memory.oom_control": strconv.Itoa(oomKillDisable), }) memory := &MemoryGroup{} if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } value, err := getCgroupParamUint(helper.CgroupPath, "memory.oom_control") if err != nil { t.Fatalf("Failed to parse memory.oom_control - %s", err) } if value != oomKillDisable { t.Fatalf("Got the wrong value, set memory.oom_control failed.") } } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/name.go000066400000000000000000000013101304443252500242210ustar00rootroot00000000000000// +build linux package fs import ( "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" ) type NameGroup struct { GroupName string Join bool } func (s *NameGroup) Name() string { return s.GroupName } func (s *NameGroup) Apply(d *cgroupData) error { if s.Join { // ignore errors if the named cgroup does not exist d.join(s.GroupName) } return nil } func (s *NameGroup) Set(path string, cgroup *configs.Cgroup) error { return nil } func (s *NameGroup) Remove(d *cgroupData) error { if s.Join { removePath(d.path(s.GroupName)) } return nil } func (s *NameGroup) GetStats(path string, stats *cgroups.Stats) error { return nil } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/net_cls.go000066400000000000000000000015371304443252500247430ustar00rootroot00000000000000// +build linux package fs import ( "strconv" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" ) type NetClsGroup struct { } func (s *NetClsGroup) Name() string { return "net_cls" } func (s *NetClsGroup) Apply(d *cgroupData) error { _, err := d.join("net_cls") if err != nil && !cgroups.IsNotFound(err) { return err } return nil } func (s *NetClsGroup) Set(path string, cgroup *configs.Cgroup) error { if cgroup.Resources.NetClsClassid != 0 { if err := writeFile(path, "net_cls.classid", strconv.FormatUint(uint64(cgroup.Resources.NetClsClassid), 10)); err != nil { return err } } return nil } func (s *NetClsGroup) Remove(d *cgroupData) error { return removePath(d.path("net_cls")) } func (s *NetClsGroup) GetStats(path string, stats *cgroups.Stats) error { return nil } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/net_cls_test.go000066400000000000000000000016601304443252500257770ustar00rootroot00000000000000// +build linux package fs import ( "strconv" "testing" ) const ( classidBefore = 0x100002 classidAfter = 0x100001 ) func TestNetClsSetClassid(t *testing.T) { helper := NewCgroupTestUtil("net_cls", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "net_cls.classid": strconv.FormatUint(classidBefore, 10), }) helper.CgroupData.config.Resources.NetClsClassid = classidAfter netcls := &NetClsGroup{} if err := netcls.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } // As we are in mock environment, we can't get correct value of classid from // net_cls.classid. // So. we just judge if we successfully write classid into file value, err := getCgroupParamUint(helper.CgroupPath, "net_cls.classid") if err != nil { t.Fatalf("Failed to parse net_cls.classid - %s", err) } if value != classidAfter { t.Fatal("Got the wrong value, set net_cls.classid failed.") } } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/net_prio.go000066400000000000000000000015121304443252500251240ustar00rootroot00000000000000// +build linux package fs import ( "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" ) type NetPrioGroup struct { } func (s *NetPrioGroup) Name() string { return "net_prio" } func (s *NetPrioGroup) Apply(d *cgroupData) error { _, err := d.join("net_prio") if err != nil && !cgroups.IsNotFound(err) { return err } return nil } func (s *NetPrioGroup) Set(path string, cgroup *configs.Cgroup) error { for _, prioMap := range cgroup.Resources.NetPrioIfpriomap { if err := writeFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil { return err } } return nil } func (s *NetPrioGroup) Remove(d *cgroupData) error { return removePath(d.path("net_prio")) } func (s *NetPrioGroup) GetStats(path string, stats *cgroups.Stats) error { return nil } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/net_prio_test.go000066400000000000000000000014161304443252500261660ustar00rootroot00000000000000// +build linux package fs import ( "strings" "testing" "github.com/opencontainers/runc/libcontainer/configs" ) var ( prioMap = []*configs.IfPrioMap{ { Interface: "test", Priority: 5, }, } ) func TestNetPrioSetIfPrio(t *testing.T) { helper := NewCgroupTestUtil("net_prio", t) defer helper.cleanup() helper.CgroupData.config.Resources.NetPrioIfpriomap = prioMap netPrio := &NetPrioGroup{} if err := netPrio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } value, err := getCgroupParamString(helper.CgroupPath, "net_prio.ifpriomap") if err != nil { t.Fatalf("Failed to parse net_prio.ifpriomap - %s", err) } if !strings.Contains(value, "test 5") { t.Fatal("Got the wrong value, set net_prio.ifpriomap failed.") } } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/perf_event.go000066400000000000000000000013671304443252500254520ustar00rootroot00000000000000// +build linux package fs import ( "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" ) type PerfEventGroup struct { } func (s *PerfEventGroup) Name() string { return "perf_event" } func (s *PerfEventGroup) Apply(d *cgroupData) error { // we just want to join this group even though we don't set anything if _, err := d.join("perf_event"); err != nil && !cgroups.IsNotFound(err) { return err } return nil } func (s *PerfEventGroup) Set(path string, cgroup *configs.Cgroup) error { return nil } func (s *PerfEventGroup) Remove(d *cgroupData) error { return removePath(d.path("perf_event")) } func (s *PerfEventGroup) GetStats(path string, stats *cgroups.Stats) error { return nil } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/pids.go000066400000000000000000000031231304443252500242440ustar00rootroot00000000000000// +build linux package fs import ( "fmt" "path/filepath" "strconv" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" ) type PidsGroup struct { } func (s *PidsGroup) Name() string { return "pids" } func (s *PidsGroup) Apply(d *cgroupData) error { _, err := d.join("pids") if err != nil && !cgroups.IsNotFound(err) { return err } return nil } func (s *PidsGroup) Set(path string, cgroup *configs.Cgroup) error { if cgroup.Resources.PidsLimit != 0 { // "max" is the fallback value. limit := "max" if cgroup.Resources.PidsLimit > 0 { limit = strconv.FormatInt(cgroup.Resources.PidsLimit, 10) } if err := writeFile(path, "pids.max", limit); err != nil { return err } } return nil } func (s *PidsGroup) Remove(d *cgroupData) error { return removePath(d.path("pids")) } func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error { current, err := getCgroupParamUint(path, "pids.current") if err != nil { return fmt.Errorf("failed to parse pids.current - %s", err) } maxString, err := getCgroupParamString(path, "pids.max") if err != nil { return fmt.Errorf("failed to parse pids.max - %s", err) } // Default if pids.max == "max" is 0 -- which represents "no limit". var max uint64 if maxString != "max" { max, err = parseUint(maxString, 10, 64) if err != nil { return fmt.Errorf("failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q", maxString, filepath.Join(path, "pids.max")) } } stats.PidsStats.Current = current stats.PidsStats.Limit = max return nil } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/pids_test.go000066400000000000000000000050641304443252500253110ustar00rootroot00000000000000// +build linux package fs import ( "strconv" "testing" "github.com/opencontainers/runc/libcontainer/cgroups" ) const ( maxUnlimited = -1 maxLimited = 1024 ) func TestPidsSetMax(t *testing.T) { helper := NewCgroupTestUtil("pids", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "pids.max": "max", }) helper.CgroupData.config.Resources.PidsLimit = maxLimited pids := &PidsGroup{} if err := pids.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } value, err := getCgroupParamUint(helper.CgroupPath, "pids.max") if err != nil { t.Fatalf("Failed to parse pids.max - %s", err) } if value != maxLimited { t.Fatalf("Expected %d, got %d for setting pids.max - limited", maxLimited, value) } } func TestPidsSetUnlimited(t *testing.T) { helper := NewCgroupTestUtil("pids", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "pids.max": strconv.Itoa(maxLimited), }) helper.CgroupData.config.Resources.PidsLimit = maxUnlimited pids := &PidsGroup{} if err := pids.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } value, err := getCgroupParamString(helper.CgroupPath, "pids.max") if err != nil { t.Fatalf("Failed to parse pids.max - %s", err) } if value != "max" { t.Fatalf("Expected %s, got %s for setting pids.max - unlimited", "max", value) } } func TestPidsStats(t *testing.T) { helper := NewCgroupTestUtil("pids", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "pids.current": strconv.Itoa(1337), "pids.max": strconv.Itoa(maxLimited), }) pids := &PidsGroup{} stats := *cgroups.NewStats() if err := pids.GetStats(helper.CgroupPath, &stats); err != nil { t.Fatal(err) } if stats.PidsStats.Current != 1337 { t.Fatalf("Expected %d, got %d for pids.current", 1337, stats.PidsStats.Current) } if stats.PidsStats.Limit != maxLimited { t.Fatalf("Expected %d, got %d for pids.max", maxLimited, stats.PidsStats.Limit) } } func TestPidsStatsUnlimited(t *testing.T) { helper := NewCgroupTestUtil("pids", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "pids.current": strconv.Itoa(4096), "pids.max": "max", }) pids := &PidsGroup{} stats := *cgroups.NewStats() if err := pids.GetStats(helper.CgroupPath, &stats); err != nil { t.Fatal(err) } if stats.PidsStats.Current != 4096 { t.Fatalf("Expected %d, got %d for pids.current", 4096, stats.PidsStats.Current) } if stats.PidsStats.Limit != 0 { t.Fatalf("Expected %d, got %d for pids.max", 0, stats.PidsStats.Limit) } } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/stats_util_test.go000066400000000000000000000073551304443252500265520ustar00rootroot00000000000000// +build linux package fs import ( "fmt" "testing" "github.com/Sirupsen/logrus" "github.com/opencontainers/runc/libcontainer/cgroups" ) func blkioStatEntryEquals(expected, actual []cgroups.BlkioStatEntry) error { if len(expected) != len(actual) { return fmt.Errorf("blkioStatEntries length do not match") } for i, expValue := range expected { actValue := actual[i] if expValue != actValue { return fmt.Errorf("Expected blkio stat entry %v but found %v", expValue, actValue) } } return nil } func expectBlkioStatsEquals(t *testing.T, expected, actual cgroups.BlkioStats) { if err := blkioStatEntryEquals(expected.IoServiceBytesRecursive, actual.IoServiceBytesRecursive); err != nil { logrus.Printf("blkio IoServiceBytesRecursive do not match - %s\n", err) t.Fail() } if err := blkioStatEntryEquals(expected.IoServicedRecursive, actual.IoServicedRecursive); err != nil { logrus.Printf("blkio IoServicedRecursive do not match - %s\n", err) t.Fail() } if err := blkioStatEntryEquals(expected.IoQueuedRecursive, actual.IoQueuedRecursive); err != nil { logrus.Printf("blkio IoQueuedRecursive do not match - %s\n", err) t.Fail() } if err := blkioStatEntryEquals(expected.SectorsRecursive, actual.SectorsRecursive); err != nil { logrus.Printf("blkio SectorsRecursive do not match - %s\n", err) t.Fail() } if err := blkioStatEntryEquals(expected.IoServiceTimeRecursive, actual.IoServiceTimeRecursive); err != nil { logrus.Printf("blkio IoServiceTimeRecursive do not match - %s\n", err) t.Fail() } if err := blkioStatEntryEquals(expected.IoWaitTimeRecursive, actual.IoWaitTimeRecursive); err != nil { logrus.Printf("blkio IoWaitTimeRecursive do not match - %s\n", err) t.Fail() } if err := blkioStatEntryEquals(expected.IoMergedRecursive, actual.IoMergedRecursive); err != nil { logrus.Printf("blkio IoMergedRecursive do not match - %v vs %v\n", expected.IoMergedRecursive, actual.IoMergedRecursive) t.Fail() } if err := blkioStatEntryEquals(expected.IoTimeRecursive, actual.IoTimeRecursive); err != nil { logrus.Printf("blkio IoTimeRecursive do not match - %s\n", err) t.Fail() } } func expectThrottlingDataEquals(t *testing.T, expected, actual cgroups.ThrottlingData) { if expected != actual { logrus.Printf("Expected throttling data %v but found %v\n", expected, actual) t.Fail() } } func expectHugetlbStatEquals(t *testing.T, expected, actual cgroups.HugetlbStats) { if expected != actual { logrus.Printf("Expected hugetlb stats %v but found %v\n", expected, actual) t.Fail() } } func expectMemoryStatEquals(t *testing.T, expected, actual cgroups.MemoryStats) { expectMemoryDataEquals(t, expected.Usage, actual.Usage) expectMemoryDataEquals(t, expected.SwapUsage, actual.SwapUsage) expectMemoryDataEquals(t, expected.KernelUsage, actual.KernelUsage) for key, expValue := range expected.Stats { actValue, ok := actual.Stats[key] if !ok { logrus.Printf("Expected memory stat key %s not found\n", key) t.Fail() } if expValue != actValue { logrus.Printf("Expected memory stat value %d but found %d\n", expValue, actValue) t.Fail() } } } func expectMemoryDataEquals(t *testing.T, expected, actual cgroups.MemoryData) { if expected.Usage != actual.Usage { logrus.Printf("Expected memory usage %d but found %d\n", expected.Usage, actual.Usage) t.Fail() } if expected.MaxUsage != actual.MaxUsage { logrus.Printf("Expected memory max usage %d but found %d\n", expected.MaxUsage, actual.MaxUsage) t.Fail() } if expected.Failcnt != actual.Failcnt { logrus.Printf("Expected memory failcnt %d but found %d\n", expected.Failcnt, actual.Failcnt) t.Fail() } if expected.Limit != actual.Limit { logrus.Printf("Expected memory limit %d but found %d\n", expected.Limit, actual.Limit) t.Fail() } } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/util_test.go000066400000000000000000000027161304443252500253300ustar00rootroot00000000000000// +build linux /* Utility for testing cgroup operations. Creates a mock of the cgroup filesystem for the duration of the test. */ package fs import ( "io/ioutil" "os" "path/filepath" "testing" "github.com/opencontainers/runc/libcontainer/configs" ) type cgroupTestUtil struct { // cgroup data to use in tests. CgroupData *cgroupData // Path to the mock cgroup directory. CgroupPath string // Temporary directory to store mock cgroup filesystem. tempDir string t *testing.T } // Creates a new test util for the specified subsystem func NewCgroupTestUtil(subsystem string, t *testing.T) *cgroupTestUtil { d := &cgroupData{ config: &configs.Cgroup{}, } d.config.Resources = &configs.Resources{} tempDir, err := ioutil.TempDir("", "cgroup_test") if err != nil { t.Fatal(err) } d.root = tempDir testCgroupPath := filepath.Join(d.root, subsystem) if err != nil { t.Fatal(err) } // Ensure the full mock cgroup path exists. err = os.MkdirAll(testCgroupPath, 0755) if err != nil { t.Fatal(err) } return &cgroupTestUtil{CgroupData: d, CgroupPath: testCgroupPath, tempDir: tempDir, t: t} } func (c *cgroupTestUtil) cleanup() { os.RemoveAll(c.tempDir) } // Write the specified contents on the mock of the specified cgroup files. func (c *cgroupTestUtil) writeFileContents(fileContents map[string]string) { for file, contents := range fileContents { err := writeFile(c.CgroupPath, file, contents) if err != nil { c.t.Fatal(err) } } } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/utils.go000066400000000000000000000040621304443252500244500ustar00rootroot00000000000000// +build linux package fs import ( "errors" "fmt" "io/ioutil" "path/filepath" "strconv" "strings" ) var ( ErrNotValidFormat = errors.New("line is not a valid key value format") ) // Saturates negative values at zero and returns a uint64. // Due to kernel bugs, some of the memory cgroup stats can be negative. func parseUint(s string, base, bitSize int) (uint64, error) { value, err := strconv.ParseUint(s, base, bitSize) if err != nil { intValue, intErr := strconv.ParseInt(s, base, bitSize) // 1. Handle negative values greater than MinInt64 (and) // 2. Handle negative values lesser than MinInt64 if intErr == nil && intValue < 0 { return 0, nil } else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 { return 0, nil } return value, err } return value, nil } // Parses a cgroup param and returns as name, value // i.e. "io_service_bytes 1234" will return as io_service_bytes, 1234 func getCgroupParamKeyValue(t string) (string, uint64, error) { parts := strings.Fields(t) switch len(parts) { case 2: value, err := parseUint(parts[1], 10, 64) if err != nil { return "", 0, fmt.Errorf("unable to convert param value (%q) to uint64: %v", parts[1], err) } return parts[0], value, nil default: return "", 0, ErrNotValidFormat } } // Gets a single uint64 value from the specified cgroup file. func getCgroupParamUint(cgroupPath, cgroupFile string) (uint64, error) { fileName := filepath.Join(cgroupPath, cgroupFile) contents, err := ioutil.ReadFile(fileName) if err != nil { return 0, err } res, err := parseUint(strings.TrimSpace(string(contents)), 10, 64) if err != nil { return res, fmt.Errorf("unable to parse %q as a uint from Cgroup file %q", string(contents), fileName) } return res, nil } // Gets a string value from the specified cgroup file func getCgroupParamString(cgroupPath, cgroupFile string) (string, error) { contents, err := ioutil.ReadFile(filepath.Join(cgroupPath, cgroupFile)) if err != nil { return "", err } return strings.TrimSpace(string(contents)), nil } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/fs/utils_test.go000066400000000000000000000040431304443252500255060ustar00rootroot00000000000000// +build linux package fs import ( "io/ioutil" "math" "os" "path/filepath" "strconv" "testing" ) const ( cgroupFile = "cgroup.file" floatValue = 2048.0 floatString = "2048" ) func TestGetCgroupParamsInt(t *testing.T) { // Setup tempdir. tempDir, err := ioutil.TempDir("", "cgroup_utils_test") if err != nil { t.Fatal(err) } defer os.RemoveAll(tempDir) tempFile := filepath.Join(tempDir, cgroupFile) // Success. err = ioutil.WriteFile(tempFile, []byte(floatString), 0755) if err != nil { t.Fatal(err) } value, err := getCgroupParamUint(tempDir, cgroupFile) if err != nil { t.Fatal(err) } else if value != floatValue { t.Fatalf("Expected %d to equal %f", value, floatValue) } // Success with new line. err = ioutil.WriteFile(tempFile, []byte(floatString+"\n"), 0755) if err != nil { t.Fatal(err) } value, err = getCgroupParamUint(tempDir, cgroupFile) if err != nil { t.Fatal(err) } else if value != floatValue { t.Fatalf("Expected %d to equal %f", value, floatValue) } // Success with negative values err = ioutil.WriteFile(tempFile, []byte("-12345"), 0755) if err != nil { t.Fatal(err) } value, err = getCgroupParamUint(tempDir, cgroupFile) if err != nil { t.Fatal(err) } else if value != 0 { t.Fatalf("Expected %d to equal %d", value, 0) } // Success with negative values lesser than min int64 s := strconv.FormatFloat(math.MinInt64, 'f', -1, 64) err = ioutil.WriteFile(tempFile, []byte(s), 0755) if err != nil { t.Fatal(err) } value, err = getCgroupParamUint(tempDir, cgroupFile) if err != nil { t.Fatal(err) } else if value != 0 { t.Fatalf("Expected %d to equal %d", value, 0) } // Not a float. err = ioutil.WriteFile(tempFile, []byte("not-a-float"), 0755) if err != nil { t.Fatal(err) } _, err = getCgroupParamUint(tempDir, cgroupFile) if err == nil { t.Fatal("Expecting error, got none") } // Unknown file. err = os.Remove(tempFile) if err != nil { t.Fatal(err) } _, err = getCgroupParamUint(tempDir, cgroupFile) if err == nil { t.Fatal("Expecting error, got none") } } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/stats.go000066400000000000000000000073011304443252500240350ustar00rootroot00000000000000// +build linux package cgroups type ThrottlingData struct { // Number of periods with throttling active Periods uint64 `json:"periods,omitempty"` // Number of periods when the container hit its throttling limit. ThrottledPeriods uint64 `json:"throttled_periods,omitempty"` // Aggregate time the container was throttled for in nanoseconds. ThrottledTime uint64 `json:"throttled_time,omitempty"` } // CpuUsage denotes the usage of a CPU. // All CPU stats are aggregate since container inception. type CpuUsage struct { // Total CPU time consumed. // Units: nanoseconds. TotalUsage uint64 `json:"total_usage,omitempty"` // Total CPU time consumed per core. // Units: nanoseconds. PercpuUsage []uint64 `json:"percpu_usage,omitempty"` // Time spent by tasks of the cgroup in kernel mode. // Units: nanoseconds. UsageInKernelmode uint64 `json:"usage_in_kernelmode"` // Time spent by tasks of the cgroup in user mode. // Units: nanoseconds. UsageInUsermode uint64 `json:"usage_in_usermode"` } type CpuStats struct { CpuUsage CpuUsage `json:"cpu_usage,omitempty"` ThrottlingData ThrottlingData `json:"throttling_data,omitempty"` } type MemoryData struct { Usage uint64 `json:"usage,omitempty"` MaxUsage uint64 `json:"max_usage,omitempty"` Failcnt uint64 `json:"failcnt"` Limit uint64 `json:"limit"` } type MemoryStats struct { // memory used for cache Cache uint64 `json:"cache,omitempty"` // usage of memory Usage MemoryData `json:"usage,omitempty"` // usage of memory + swap SwapUsage MemoryData `json:"swap_usage,omitempty"` // usage of kernel memory KernelUsage MemoryData `json:"kernel_usage,omitempty"` // usage of kernel TCP memory KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"` Stats map[string]uint64 `json:"stats,omitempty"` } type PidsStats struct { // number of pids in the cgroup Current uint64 `json:"current,omitempty"` // active pids hard limit Limit uint64 `json:"limit,omitempty"` } type BlkioStatEntry struct { Major uint64 `json:"major,omitempty"` Minor uint64 `json:"minor,omitempty"` Op string `json:"op,omitempty"` Value uint64 `json:"value,omitempty"` } type BlkioStats struct { // number of bytes tranferred to and from the block device IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"` IoServicedRecursive []BlkioStatEntry `json:"io_serviced_recursive,omitempty"` IoQueuedRecursive []BlkioStatEntry `json:"io_queue_recursive,omitempty"` IoServiceTimeRecursive []BlkioStatEntry `json:"io_service_time_recursive,omitempty"` IoWaitTimeRecursive []BlkioStatEntry `json:"io_wait_time_recursive,omitempty"` IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitempty"` IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitempty"` SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"` } type HugetlbStats struct { // current res_counter usage for hugetlb Usage uint64 `json:"usage,omitempty"` // maximum usage ever recorded. MaxUsage uint64 `json:"max_usage,omitempty"` // number of times hugetlb usage allocation failure. Failcnt uint64 `json:"failcnt"` } type Stats struct { CpuStats CpuStats `json:"cpu_stats,omitempty"` MemoryStats MemoryStats `json:"memory_stats,omitempty"` PidsStats PidsStats `json:"pids_stats,omitempty"` BlkioStats BlkioStats `json:"blkio_stats,omitempty"` // the map is in the format "size of hugepage: stats of the hugepage" HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"` } func NewStats() *Stats { memoryStats := MemoryStats{Stats: make(map[string]uint64)} hugetlbStats := make(map[string]HugetlbStats) return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats} } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/systemd/000077500000000000000000000000001304443252500240375ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/libcontainer/cgroups/systemd/apply_nosystemd.go000066400000000000000000000022011304443252500276130ustar00rootroot00000000000000// +build !linux package systemd import ( "fmt" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" ) type Manager struct { Cgroups *configs.Cgroup Paths map[string]string } func UseSystemd() bool { return false } func (m *Manager) Apply(pid int) error { return fmt.Errorf("Systemd not supported") } func (m *Manager) GetPids() ([]int, error) { return nil, fmt.Errorf("Systemd not supported") } func (m *Manager) GetAllPids() ([]int, error) { return nil, fmt.Errorf("Systemd not supported") } func (m *Manager) Destroy() error { return fmt.Errorf("Systemd not supported") } func (m *Manager) GetPaths() map[string]string { return nil } func (m *Manager) GetStats() (*cgroups.Stats, error) { return nil, fmt.Errorf("Systemd not supported") } func (m *Manager) Set(container *configs.Config) error { return nil, fmt.Errorf("Systemd not supported") } func (m *Manager) Freeze(state configs.FreezerState) error { return fmt.Errorf("Systemd not supported") } func Freeze(c *configs.Cgroup, state configs.FreezerState) error { return fmt.Errorf("Systemd not supported") } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/systemd/apply_systemd.go000066400000000000000000000347071304443252500272760ustar00rootroot00000000000000// +build linux package systemd import ( "errors" "fmt" "io/ioutil" "os" "path/filepath" "strings" "sync" "time" systemdDbus "github.com/coreos/go-systemd/dbus" systemdUtil "github.com/coreos/go-systemd/util" "github.com/godbus/dbus" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups/fs" "github.com/opencontainers/runc/libcontainer/configs" ) type Manager struct { mu sync.Mutex Cgroups *configs.Cgroup Paths map[string]string } type subsystem interface { // Name returns the name of the subsystem. Name() string // Returns the stats, as 'stats', corresponding to the cgroup under 'path'. GetStats(path string, stats *cgroups.Stats) error // Set the cgroup represented by cgroup. Set(path string, cgroup *configs.Cgroup) error } var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist") type subsystemSet []subsystem func (s subsystemSet) Get(name string) (subsystem, error) { for _, ss := range s { if ss.Name() == name { return ss, nil } } return nil, errSubsystemDoesNotExist } var subsystems = subsystemSet{ &fs.CpusetGroup{}, &fs.DevicesGroup{}, &fs.MemoryGroup{}, &fs.CpuGroup{}, &fs.CpuacctGroup{}, &fs.PidsGroup{}, &fs.BlkioGroup{}, &fs.HugetlbGroup{}, &fs.PerfEventGroup{}, &fs.FreezerGroup{}, &fs.NetPrioGroup{}, &fs.NetClsGroup{}, &fs.NameGroup{GroupName: "name=systemd"}, } const ( testScopeWait = 4 testSliceWait = 4 ) var ( connLock sync.Mutex theConn *systemdDbus.Conn hasStartTransientUnit bool hasStartTransientSliceUnit bool hasTransientDefaultDependencies bool hasDelegate bool ) func newProp(name string, units interface{}) systemdDbus.Property { return systemdDbus.Property{ Name: name, Value: dbus.MakeVariant(units), } } func UseSystemd() bool { if !systemdUtil.IsRunningSystemd() { return false } connLock.Lock() defer connLock.Unlock() if theConn == nil { var err error theConn, err = systemdDbus.New() if err != nil { return false } // Assume we have StartTransientUnit hasStartTransientUnit = true // But if we get UnknownMethod error we don't if _, err := theConn.StartTransientUnit("test.scope", "invalid", nil, nil); err != nil { if dbusError, ok := err.(dbus.Error); ok { if dbusError.Name == "org.freedesktop.DBus.Error.UnknownMethod" { hasStartTransientUnit = false return hasStartTransientUnit } } } // Ensure the scope name we use doesn't exist. Use the Pid to // avoid collisions between multiple libcontainer users on a // single host. scope := fmt.Sprintf("libcontainer-%d-systemd-test-default-dependencies.scope", os.Getpid()) testScopeExists := true for i := 0; i <= testScopeWait; i++ { if _, err := theConn.StopUnit(scope, "replace", nil); err != nil { if dbusError, ok := err.(dbus.Error); ok { if strings.Contains(dbusError.Name, "org.freedesktop.systemd1.NoSuchUnit") { testScopeExists = false break } } } time.Sleep(time.Millisecond) } // Bail out if we can't kill this scope without testing for DefaultDependencies if testScopeExists { return hasStartTransientUnit } // Assume StartTransientUnit on a scope allows DefaultDependencies hasTransientDefaultDependencies = true ddf := newProp("DefaultDependencies", false) if _, err := theConn.StartTransientUnit(scope, "replace", []systemdDbus.Property{ddf}, nil); err != nil { if dbusError, ok := err.(dbus.Error); ok { if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") { hasTransientDefaultDependencies = false } } } // Not critical because of the stop unit logic above. theConn.StopUnit(scope, "replace", nil) // Assume StartTransientUnit on a scope allows Delegate hasDelegate = true dl := newProp("Delegate", true) if _, err := theConn.StartTransientUnit(scope, "replace", []systemdDbus.Property{dl}, nil); err != nil { if dbusError, ok := err.(dbus.Error); ok { if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") { hasDelegate = false } } } // Assume we have the ability to start a transient unit as a slice // This was broken until systemd v229, but has been back-ported on RHEL environments >= 219 // For details, see: https://bugzilla.redhat.com/show_bug.cgi?id=1370299 hasStartTransientSliceUnit = true // To ensure simple clean-up, we create a slice off the root with no hierarchy slice := fmt.Sprintf("libcontainer_%d_systemd_test_default.slice", os.Getpid()) if _, err := theConn.StartTransientUnit(slice, "replace", nil, nil); err != nil { if _, ok := err.(dbus.Error); ok { hasStartTransientSliceUnit = false } } for i := 0; i <= testSliceWait; i++ { if _, err := theConn.StopUnit(slice, "replace", nil); err != nil { if dbusError, ok := err.(dbus.Error); ok { if strings.Contains(dbusError.Name, "org.freedesktop.systemd1.NoSuchUnit") { hasStartTransientSliceUnit = false break } } } else { break } time.Sleep(time.Millisecond) } // Not critical because of the stop unit logic above. theConn.StopUnit(scope, "replace", nil) theConn.StopUnit(slice, "replace", nil) } return hasStartTransientUnit } func (m *Manager) Apply(pid int) error { var ( c = m.Cgroups unitName = getUnitName(c) slice = "system.slice" properties []systemdDbus.Property ) if c.Paths != nil { paths := make(map[string]string) for name, path := range c.Paths { _, err := getSubsystemPath(m.Cgroups, name) if err != nil { // Don't fail if a cgroup hierarchy was not found, just skip this subsystem if cgroups.IsNotFound(err) { continue } return err } paths[name] = path } m.Paths = paths return cgroups.EnterPid(m.Paths, pid) } if c.Parent != "" { slice = c.Parent } properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name)) // if we create a slice, the parent is defined via a Wants= if strings.HasSuffix(unitName, ".slice") { // This was broken until systemd v229, but has been back-ported on RHEL environments >= 219 if !hasStartTransientSliceUnit { return fmt.Errorf("systemd version does not support ability to start a slice as transient unit") } properties = append(properties, systemdDbus.PropWants(slice)) } else { // otherwise, we use Slice= properties = append(properties, systemdDbus.PropSlice(slice)) } // only add pid if its valid, -1 is used w/ general slice creation. if pid != -1 { properties = append(properties, newProp("PIDs", []uint32{uint32(pid)})) } if hasDelegate { // This is only supported on systemd versions 218 and above. properties = append(properties, newProp("Delegate", true)) } // Always enable accounting, this gets us the same behaviour as the fs implementation, // plus the kernel has some problems with joining the memory cgroup at a later time. properties = append(properties, newProp("MemoryAccounting", true), newProp("CPUAccounting", true), newProp("BlockIOAccounting", true)) if hasTransientDefaultDependencies { properties = append(properties, newProp("DefaultDependencies", false)) } if c.Resources.Memory != 0 { properties = append(properties, newProp("MemoryLimit", uint64(c.Resources.Memory))) } if c.Resources.CpuShares != 0 { properties = append(properties, newProp("CPUShares", uint64(c.Resources.CpuShares))) } if c.Resources.BlkioWeight != 0 { properties = append(properties, newProp("BlockIOWeight", uint64(c.Resources.BlkioWeight))) } // We have to set kernel memory here, as we can't change it once // processes have been attached to the cgroup. if c.Resources.KernelMemory != 0 { if err := setKernelMemory(c); err != nil { return err } } if _, err := theConn.StartTransientUnit(unitName, "replace", properties, nil); err != nil && !isUnitExists(err) { return err } if err := joinCgroups(c, pid); err != nil { return err } paths := make(map[string]string) for _, s := range subsystems { subsystemPath, err := getSubsystemPath(m.Cgroups, s.Name()) if err != nil { // Don't fail if a cgroup hierarchy was not found, just skip this subsystem if cgroups.IsNotFound(err) { continue } return err } paths[s.Name()] = subsystemPath } m.Paths = paths return nil } func (m *Manager) Destroy() error { if m.Cgroups.Paths != nil { return nil } m.mu.Lock() defer m.mu.Unlock() theConn.StopUnit(getUnitName(m.Cgroups), "replace", nil) if err := cgroups.RemovePaths(m.Paths); err != nil { return err } m.Paths = make(map[string]string) return nil } func (m *Manager) GetPaths() map[string]string { m.mu.Lock() paths := m.Paths m.mu.Unlock() return paths } func writeFile(dir, file, data string) error { // Normally dir should not be empty, one case is that cgroup subsystem // is not mounted, we will get empty dir, and we want it fail here. if dir == "" { return fmt.Errorf("no such directory for %s", file) } return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700) } func join(c *configs.Cgroup, subsystem string, pid int) (string, error) { path, err := getSubsystemPath(c, subsystem) if err != nil { return "", err } if err := os.MkdirAll(path, 0755); err != nil { return "", err } if err := cgroups.WriteCgroupProc(path, pid); err != nil { return "", err } return path, nil } func joinCgroups(c *configs.Cgroup, pid int) error { for _, sys := range subsystems { name := sys.Name() switch name { case "name=systemd": // let systemd handle this break case "cpuset": path, err := getSubsystemPath(c, name) if err != nil && !cgroups.IsNotFound(err) { return err } s := &fs.CpusetGroup{} if err := s.ApplyDir(path, c, pid); err != nil { return err } break default: _, err := join(c, name, pid) if err != nil { // Even if it's `not found` error, we'll return err // because devices cgroup is hard requirement for // container security. if name == "devices" { return err } // For other subsystems, omit the `not found` error // because they are optional. if !cgroups.IsNotFound(err) { return err } } } } return nil } // systemd represents slice hierarchy using `-`, so we need to follow suit when // generating the path of slice. Essentially, test-a-b.slice becomes // test.slice/test-a.slice/test-a-b.slice. func ExpandSlice(slice string) (string, error) { suffix := ".slice" // Name has to end with ".slice", but can't be just ".slice". if len(slice) < len(suffix) || !strings.HasSuffix(slice, suffix) { return "", fmt.Errorf("invalid slice name: %s", slice) } // Path-separators are not allowed. if strings.Contains(slice, "/") { return "", fmt.Errorf("invalid slice name: %s", slice) } var path, prefix string sliceName := strings.TrimSuffix(slice, suffix) // if input was -.slice, we should just return root now if sliceName == "-" { return "/", nil } for _, component := range strings.Split(sliceName, "-") { // test--a.slice isn't permitted, nor is -test.slice. if component == "" { return "", fmt.Errorf("invalid slice name: %s", slice) } // Append the component to the path and to the prefix. path += prefix + component + suffix + "/" prefix += component + "-" } return path, nil } func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) { mountpoint, err := cgroups.FindCgroupMountpoint(subsystem) if err != nil { return "", err } initPath, err := cgroups.GetInitCgroupDir(subsystem) if err != nil { return "", err } // if pid 1 is systemd 226 or later, it will be in init.scope, not the root initPath = strings.TrimSuffix(filepath.Clean(initPath), "init.scope") slice := "system.slice" if c.Parent != "" { slice = c.Parent } slice, err = ExpandSlice(slice) if err != nil { return "", err } return filepath.Join(mountpoint, initPath, slice, getUnitName(c)), nil } func (m *Manager) Freeze(state configs.FreezerState) error { path, err := getSubsystemPath(m.Cgroups, "freezer") if err != nil { return err } prevState := m.Cgroups.Resources.Freezer m.Cgroups.Resources.Freezer = state freezer, err := subsystems.Get("freezer") if err != nil { return err } err = freezer.Set(path, m.Cgroups) if err != nil { m.Cgroups.Resources.Freezer = prevState return err } return nil } func (m *Manager) GetPids() ([]int, error) { path, err := getSubsystemPath(m.Cgroups, "devices") if err != nil { return nil, err } return cgroups.GetPids(path) } func (m *Manager) GetAllPids() ([]int, error) { path, err := getSubsystemPath(m.Cgroups, "devices") if err != nil { return nil, err } return cgroups.GetAllPids(path) } func (m *Manager) GetStats() (*cgroups.Stats, error) { m.mu.Lock() defer m.mu.Unlock() stats := cgroups.NewStats() for name, path := range m.Paths { sys, err := subsystems.Get(name) if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) { continue } if err := sys.GetStats(path, stats); err != nil { return nil, err } } return stats, nil } func (m *Manager) Set(container *configs.Config) error { // If Paths are set, then we are just joining cgroups paths // and there is no need to set any values. if m.Cgroups.Paths != nil { return nil } for _, sys := range subsystems { // Get the subsystem path, but don't error out for not found cgroups. path, err := getSubsystemPath(container.Cgroups, sys.Name()) if err != nil && !cgroups.IsNotFound(err) { return err } if err := sys.Set(path, container.Cgroups); err != nil { return err } } if m.Paths["cpu"] != "" { if err := fs.CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil { return err } } return nil } func getUnitName(c *configs.Cgroup) string { // by default, we create a scope unless the user explicitly asks for a slice. if !strings.HasSuffix(c.Name, ".slice") { return fmt.Sprintf("%s-%s.scope", c.ScopePrefix, c.Name) } return c.Name } func setKernelMemory(c *configs.Cgroup) error { path, err := getSubsystemPath(c, "memory") if err != nil && !cgroups.IsNotFound(err) { return err } if err := os.MkdirAll(path, 0755); err != nil { return err } return fs.EnableKernelMemoryAccounting(path) } // isUnitExists returns true if the error is that a systemd unit already exists. func isUnitExists(err error) bool { if err != nil { if dbusError, ok := err.(dbus.Error); ok { return strings.Contains(dbusError.Name, "org.freedesktop.systemd1.UnitExists") } } return false } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/utils.go000066400000000000000000000243431304443252500240440ustar00rootroot00000000000000// +build linux package cgroups import ( "bufio" "fmt" "io" "io/ioutil" "os" "path/filepath" "strconv" "strings" "time" "github.com/docker/go-units" ) const ( cgroupNamePrefix = "name=" CgroupProcesses = "cgroup.procs" ) // https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt func FindCgroupMountpoint(subsystem string) (string, error) { // We are not using mount.GetMounts() because it's super-inefficient, // parsing it directly sped up x10 times because of not using Sscanf. // It was one of two major performance drawbacks in container start. if !isSubsystemAvailable(subsystem) { return "", NewNotFoundError(subsystem) } f, err := os.Open("/proc/self/mountinfo") if err != nil { return "", err } defer f.Close() scanner := bufio.NewScanner(f) for scanner.Scan() { txt := scanner.Text() fields := strings.Split(txt, " ") for _, opt := range strings.Split(fields[len(fields)-1], ",") { if opt == subsystem { return fields[4], nil } } } if err := scanner.Err(); err != nil { return "", err } return "", NewNotFoundError(subsystem) } func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) { if !isSubsystemAvailable(subsystem) { return "", "", NewNotFoundError(subsystem) } f, err := os.Open("/proc/self/mountinfo") if err != nil { return "", "", err } defer f.Close() scanner := bufio.NewScanner(f) for scanner.Scan() { txt := scanner.Text() fields := strings.Split(txt, " ") for _, opt := range strings.Split(fields[len(fields)-1], ",") { if opt == subsystem { return fields[4], fields[3], nil } } } if err := scanner.Err(); err != nil { return "", "", err } return "", "", NewNotFoundError(subsystem) } func isSubsystemAvailable(subsystem string) bool { cgroups, err := ParseCgroupFile("/proc/self/cgroup") if err != nil { return false } _, avail := cgroups[subsystem] return avail } func FindCgroupMountpointDir() (string, error) { f, err := os.Open("/proc/self/mountinfo") if err != nil { return "", err } defer f.Close() scanner := bufio.NewScanner(f) for scanner.Scan() { text := scanner.Text() fields := strings.Split(text, " ") // Safe as mountinfo encodes mountpoints with spaces as \040. index := strings.Index(text, " - ") postSeparatorFields := strings.Fields(text[index+3:]) numPostFields := len(postSeparatorFields) // This is an error as we can't detect if the mount is for "cgroup" if numPostFields == 0 { return "", fmt.Errorf("Found no fields post '-' in %q", text) } if postSeparatorFields[0] == "cgroup" { // Check that the mount is properly formated. if numPostFields < 3 { return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text) } return filepath.Dir(fields[4]), nil } } if err := scanner.Err(); err != nil { return "", err } return "", NewNotFoundError("cgroup") } type Mount struct { Mountpoint string Root string Subsystems []string } func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) { if len(m.Subsystems) == 0 { return "", fmt.Errorf("no subsystem for mount") } return getControllerPath(m.Subsystems[0], cgroups) } func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) { res := make([]Mount, 0, len(ss)) scanner := bufio.NewScanner(mi) numFound := 0 for scanner.Scan() && numFound < len(ss) { txt := scanner.Text() sepIdx := strings.Index(txt, " - ") if sepIdx == -1 { return nil, fmt.Errorf("invalid mountinfo format") } if txt[sepIdx+3:sepIdx+9] != "cgroup" { continue } fields := strings.Split(txt, " ") m := Mount{ Mountpoint: fields[4], Root: fields[3], } for _, opt := range strings.Split(fields[len(fields)-1], ",") { if !ss[opt] { continue } if strings.HasPrefix(opt, cgroupNamePrefix) { m.Subsystems = append(m.Subsystems, opt[len(cgroupNamePrefix):]) } else { m.Subsystems = append(m.Subsystems, opt) } if !all { numFound++ } } res = append(res, m) } if err := scanner.Err(); err != nil { return nil, err } return res, nil } // GetCgroupMounts returns the mounts for the cgroup subsystems. // all indicates whether to return just the first instance or all the mounts. func GetCgroupMounts(all bool) ([]Mount, error) { f, err := os.Open("/proc/self/mountinfo") if err != nil { return nil, err } defer f.Close() allSubsystems, err := ParseCgroupFile("/proc/self/cgroup") if err != nil { return nil, err } allMap := make(map[string]bool) for s := range allSubsystems { allMap[s] = true } return getCgroupMountsHelper(allMap, f, all) } // GetAllSubsystems returns all the cgroup subsystems supported by the kernel func GetAllSubsystems() ([]string, error) { f, err := os.Open("/proc/cgroups") if err != nil { return nil, err } defer f.Close() subsystems := []string{} s := bufio.NewScanner(f) for s.Scan() { if err := s.Err(); err != nil { return nil, err } text := s.Text() if text[0] != '#' { parts := strings.Fields(text) if len(parts) >= 4 && parts[3] != "0" { subsystems = append(subsystems, parts[0]) } } } return subsystems, nil } // GetThisCgroupDir returns the relative path to the cgroup docker is running in. func GetThisCgroupDir(subsystem string) (string, error) { cgroups, err := ParseCgroupFile("/proc/self/cgroup") if err != nil { return "", err } return getControllerPath(subsystem, cgroups) } func GetInitCgroupDir(subsystem string) (string, error) { cgroups, err := ParseCgroupFile("/proc/1/cgroup") if err != nil { return "", err } return getControllerPath(subsystem, cgroups) } func readProcsFile(dir string) ([]int, error) { f, err := os.Open(filepath.Join(dir, CgroupProcesses)) if err != nil { return nil, err } defer f.Close() var ( s = bufio.NewScanner(f) out = []int{} ) for s.Scan() { if t := s.Text(); t != "" { pid, err := strconv.Atoi(t) if err != nil { return nil, err } out = append(out, pid) } } return out, nil } // ParseCgroupFile parses the given cgroup file, typically from // /proc//cgroup, into a map of subgroups to cgroup names. func ParseCgroupFile(path string) (map[string]string, error) { f, err := os.Open(path) if err != nil { return nil, err } defer f.Close() return parseCgroupFromReader(f) } // helper function for ParseCgroupFile to make testing easier func parseCgroupFromReader(r io.Reader) (map[string]string, error) { s := bufio.NewScanner(r) cgroups := make(map[string]string) for s.Scan() { if err := s.Err(); err != nil { return nil, err } text := s.Text() // from cgroups(7): // /proc/[pid]/cgroup // ... // For each cgroup hierarchy ... there is one entry // containing three colon-separated fields of the form: // hierarchy-ID:subsystem-list:cgroup-path parts := strings.SplitN(text, ":", 3) if len(parts) < 3 { return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text) } for _, subs := range strings.Split(parts[1], ",") { cgroups[subs] = parts[2] } } return cgroups, nil } func getControllerPath(subsystem string, cgroups map[string]string) (string, error) { if p, ok := cgroups[subsystem]; ok { return p, nil } if p, ok := cgroups[cgroupNamePrefix+subsystem]; ok { return p, nil } return "", NewNotFoundError(subsystem) } func PathExists(path string) bool { if _, err := os.Stat(path); err != nil { return false } return true } func EnterPid(cgroupPaths map[string]string, pid int) error { for _, path := range cgroupPaths { if PathExists(path) { if err := WriteCgroupProc(path, pid); err != nil { return err } } } return nil } // RemovePaths iterates over the provided paths removing them. // We trying to remove all paths five times with increasing delay between tries. // If after all there are not removed cgroups - appropriate error will be // returned. func RemovePaths(paths map[string]string) (err error) { delay := 10 * time.Millisecond for i := 0; i < 5; i++ { if i != 0 { time.Sleep(delay) delay *= 2 } for s, p := range paths { os.RemoveAll(p) // TODO: here probably should be logging _, err := os.Stat(p) // We need this strange way of checking cgroups existence because // RemoveAll almost always returns error, even on already removed // cgroups if os.IsNotExist(err) { delete(paths, s) } } if len(paths) == 0 { return nil } } return fmt.Errorf("Failed to remove paths: %v", paths) } func GetHugePageSize() ([]string, error) { var pageSizes []string sizeList := []string{"B", "kB", "MB", "GB", "TB", "PB"} files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages") if err != nil { return pageSizes, err } for _, st := range files { nameArray := strings.Split(st.Name(), "-") pageSize, err := units.RAMInBytes(nameArray[1]) if err != nil { return []string{}, err } sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList) pageSizes = append(pageSizes, sizeString) } return pageSizes, nil } // GetPids returns all pids, that were added to cgroup at path. func GetPids(path string) ([]int, error) { return readProcsFile(path) } // GetAllPids returns all pids, that were added to cgroup at path and to all its // subcgroups. func GetAllPids(path string) ([]int, error) { var pids []int // collect pids from all sub-cgroups err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error { dir, file := filepath.Split(p) if file != CgroupProcesses { return nil } if iErr != nil { return iErr } cPids, err := readProcsFile(dir) if err != nil { return err } pids = append(pids, cPids...) return nil }) return pids, err } // WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file func WriteCgroupProc(dir string, pid int) error { // Normally dir should not be empty, one case is that cgroup subsystem // is not mounted, we will get empty dir, and we want it fail here. if dir == "" { return fmt.Errorf("no such directory for %s", CgroupProcesses) } // Dont attach any pid to the cgroup if -1 is specified as a pid if pid != -1 { if err := ioutil.WriteFile(filepath.Join(dir, CgroupProcesses), []byte(strconv.Itoa(pid)), 0700); err != nil { return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err) } } return nil } docker-runc-tags-docker-1.13.1/libcontainer/cgroups/utils_test.go000066400000000000000000000422471304443252500251060ustar00rootroot00000000000000// +build linux package cgroups import ( "bytes" "fmt" "reflect" "strings" "testing" ) const fedoraMountinfo = `15 35 0:3 / /proc rw,nosuid,nodev,noexec,relatime shared:5 - proc proc rw 16 35 0:14 / /sys rw,nosuid,nodev,noexec,relatime shared:6 - sysfs sysfs rw,seclabel 17 35 0:5 / /dev rw,nosuid shared:2 - devtmpfs devtmpfs rw,seclabel,size=8056484k,nr_inodes=2014121,mode=755 18 16 0:15 / /sys/kernel/security rw,nosuid,nodev,noexec,relatime shared:7 - securityfs securityfs rw 19 16 0:13 / /sys/fs/selinux rw,relatime shared:8 - selinuxfs selinuxfs rw 20 17 0:16 / /dev/shm rw,nosuid,nodev shared:3 - tmpfs tmpfs rw,seclabel 21 17 0:10 / /dev/pts rw,nosuid,noexec,relatime shared:4 - devpts devpts rw,seclabel,gid=5,mode=620,ptmxmode=000 22 35 0:17 / /run rw,nosuid,nodev shared:21 - tmpfs tmpfs rw,seclabel,mode=755 23 16 0:18 / /sys/fs/cgroup rw,nosuid,nodev,noexec shared:9 - tmpfs tmpfs rw,seclabel,mode=755 24 23 0:19 / /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime shared:10 - cgroup cgroup rw,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd 25 16 0:20 / /sys/fs/pstore rw,nosuid,nodev,noexec,relatime shared:20 - pstore pstore rw 26 23 0:21 / /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime shared:11 - cgroup cgroup rw,cpuset,clone_children 27 23 0:22 / /sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime shared:12 - cgroup cgroup rw,cpuacct,cpu,clone_children 28 23 0:23 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:13 - cgroup cgroup rw,memory,clone_children 29 23 0:24 / /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:14 - cgroup cgroup rw,devices,clone_children 30 23 0:25 / /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime shared:15 - cgroup cgroup rw,freezer,clone_children 31 23 0:26 / /sys/fs/cgroup/net_cls rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,net_cls,clone_children 32 23 0:27 / /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime shared:17 - cgroup cgroup rw,blkio,clone_children 33 23 0:28 / /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime shared:18 - cgroup cgroup rw,perf_event,clone_children 34 23 0:29 / /sys/fs/cgroup/hugetlb rw,nosuid,nodev,noexec,relatime shared:19 - cgroup cgroup rw,hugetlb,clone_children 35 1 253:2 / / rw,relatime shared:1 - ext4 /dev/mapper/ssd-root--f20 rw,seclabel,data=ordered 36 15 0:30 / /proc/sys/fs/binfmt_misc rw,relatime shared:22 - autofs systemd-1 rw,fd=38,pgrp=1,timeout=300,minproto=5,maxproto=5,direct 37 17 0:12 / /dev/mqueue rw,relatime shared:23 - mqueue mqueue rw,seclabel 38 35 0:31 / /tmp rw shared:24 - tmpfs tmpfs rw,seclabel 39 17 0:32 / /dev/hugepages rw,relatime shared:25 - hugetlbfs hugetlbfs rw,seclabel 40 16 0:7 / /sys/kernel/debug rw,relatime shared:26 - debugfs debugfs rw 41 16 0:33 / /sys/kernel/config rw,relatime shared:27 - configfs configfs rw 42 35 0:34 / /var/lib/nfs/rpc_pipefs rw,relatime shared:28 - rpc_pipefs sunrpc rw 43 15 0:35 / /proc/fs/nfsd rw,relatime shared:29 - nfsd sunrpc rw 45 35 8:17 / /boot rw,relatime shared:30 - ext4 /dev/sdb1 rw,seclabel,data=ordered 46 35 253:4 / /home rw,relatime shared:31 - ext4 /dev/mapper/ssd-home rw,seclabel,data=ordered 47 35 253:5 / /var/lib/libvirt/images rw,noatime,nodiratime shared:32 - ext4 /dev/mapper/ssd-virt rw,seclabel,discard,data=ordered 48 35 253:12 / /mnt/old rw,relatime shared:33 - ext4 /dev/mapper/HelpDeskRHEL6-FedoraRoot rw,seclabel,data=ordered 121 22 0:36 / /run/user/1000/gvfs rw,nosuid,nodev,relatime shared:104 - fuse.gvfsd-fuse gvfsd-fuse rw,user_id=1000,group_id=1000 124 16 0:37 / /sys/fs/fuse/connections rw,relatime shared:107 - fusectl fusectl rw 165 38 253:3 / /tmp/mnt rw,relatime shared:147 - ext4 /dev/mapper/ssd-root rw,seclabel,data=ordered 167 35 253:15 / /var/lib/docker/devicemapper/mnt/aae4076022f0e2b80a2afbf8fc6df450c52080191fcef7fb679a73e6f073e5c2 rw,relatime shared:149 - ext4 /dev/mapper/docker-253:2-425882-aae4076022f0e2b80a2afbf8fc6df450c52080191fcef7fb679a73e6f073e5c2 rw,seclabel,discard,stripe=16,data=ordered 171 35 253:16 / /var/lib/docker/devicemapper/mnt/c71be651f114db95180e472f7871b74fa597ee70a58ccc35cb87139ddea15373 rw,relatime shared:153 - ext4 /dev/mapper/docker-253:2-425882-c71be651f114db95180e472f7871b74fa597ee70a58ccc35cb87139ddea15373 rw,seclabel,discard,stripe=16,data=ordered 175 35 253:17 / /var/lib/docker/devicemapper/mnt/1bac6ab72862d2d5626560df6197cf12036b82e258c53d981fa29adce6f06c3c rw,relatime shared:157 - ext4 /dev/mapper/docker-253:2-425882-1bac6ab72862d2d5626560df6197cf12036b82e258c53d981fa29adce6f06c3c rw,seclabel,discard,stripe=16,data=ordered 179 35 253:18 / /var/lib/docker/devicemapper/mnt/d710a357d77158e80d5b2c55710ae07c94e76d34d21ee7bae65ce5418f739b09 rw,relatime shared:161 - ext4 /dev/mapper/docker-253:2-425882-d710a357d77158e80d5b2c55710ae07c94e76d34d21ee7bae65ce5418f739b09 rw,seclabel,discard,stripe=16,data=ordered 183 35 253:19 / /var/lib/docker/devicemapper/mnt/6479f52366114d5f518db6837254baab48fab39f2ac38d5099250e9a6ceae6c7 rw,relatime shared:165 - ext4 /dev/mapper/docker-253:2-425882-6479f52366114d5f518db6837254baab48fab39f2ac38d5099250e9a6ceae6c7 rw,seclabel,discard,stripe=16,data=ordered 187 35 253:20 / /var/lib/docker/devicemapper/mnt/8d9df91c4cca5aef49eeb2725292aab324646f723a7feab56be34c2ad08268e1 rw,relatime shared:169 - ext4 /dev/mapper/docker-253:2-425882-8d9df91c4cca5aef49eeb2725292aab324646f723a7feab56be34c2ad08268e1 rw,seclabel,discard,stripe=16,data=ordered 191 35 253:21 / /var/lib/docker/devicemapper/mnt/c8240b768603d32e920d365dc9d1dc2a6af46cd23e7ae819947f969e1b4ec661 rw,relatime shared:173 - ext4 /dev/mapper/docker-253:2-425882-c8240b768603d32e920d365dc9d1dc2a6af46cd23e7ae819947f969e1b4ec661 rw,seclabel,discard,stripe=16,data=ordered 195 35 253:22 / /var/lib/docker/devicemapper/mnt/2eb3a01278380bbf3ed12d86ac629eaa70a4351301ee307a5cabe7b5f3b1615f rw,relatime shared:177 - ext4 /dev/mapper/docker-253:2-425882-2eb3a01278380bbf3ed12d86ac629eaa70a4351301ee307a5cabe7b5f3b1615f rw,seclabel,discard,stripe=16,data=ordered 199 35 253:23 / /var/lib/docker/devicemapper/mnt/37a17fb7c9d9b80821235d5f2662879bd3483915f245f9b49cdaa0e38779b70b rw,relatime shared:181 - ext4 /dev/mapper/docker-253:2-425882-37a17fb7c9d9b80821235d5f2662879bd3483915f245f9b49cdaa0e38779b70b rw,seclabel,discard,stripe=16,data=ordered 203 35 253:24 / /var/lib/docker/devicemapper/mnt/aea459ae930bf1de913e2f29428fd80ee678a1e962d4080019d9f9774331ee2b rw,relatime shared:185 - ext4 /dev/mapper/docker-253:2-425882-aea459ae930bf1de913e2f29428fd80ee678a1e962d4080019d9f9774331ee2b rw,seclabel,discard,stripe=16,data=ordered 207 35 253:25 / /var/lib/docker/devicemapper/mnt/928ead0bc06c454bd9f269e8585aeae0a6bd697f46dc8754c2a91309bc810882 rw,relatime shared:189 - ext4 /dev/mapper/docker-253:2-425882-928ead0bc06c454bd9f269e8585aeae0a6bd697f46dc8754c2a91309bc810882 rw,seclabel,discard,stripe=16,data=ordered 211 35 253:26 / /var/lib/docker/devicemapper/mnt/0f284d18481d671644706e7a7244cbcf63d590d634cc882cb8721821929d0420 rw,relatime shared:193 - ext4 /dev/mapper/docker-253:2-425882-0f284d18481d671644706e7a7244cbcf63d590d634cc882cb8721821929d0420 rw,seclabel,discard,stripe=16,data=ordered 215 35 253:27 / /var/lib/docker/devicemapper/mnt/d9dd16722ab34c38db2733e23f69e8f4803ce59658250dd63e98adff95d04919 rw,relatime shared:197 - ext4 /dev/mapper/docker-253:2-425882-d9dd16722ab34c38db2733e23f69e8f4803ce59658250dd63e98adff95d04919 rw,seclabel,discard,stripe=16,data=ordered 219 35 253:28 / /var/lib/docker/devicemapper/mnt/bc4500479f18c2c08c21ad5282e5f826a016a386177d9874c2764751c031d634 rw,relatime shared:201 - ext4 /dev/mapper/docker-253:2-425882-bc4500479f18c2c08c21ad5282e5f826a016a386177d9874c2764751c031d634 rw,seclabel,discard,stripe=16,data=ordered 223 35 253:29 / /var/lib/docker/devicemapper/mnt/7770c8b24eb3d5cc159a065910076938910d307ab2f5d94e1dc3b24c06ee2c8a rw,relatime shared:205 - ext4 /dev/mapper/docker-253:2-425882-7770c8b24eb3d5cc159a065910076938910d307ab2f5d94e1dc3b24c06ee2c8a rw,seclabel,discard,stripe=16,data=ordered 227 35 253:30 / /var/lib/docker/devicemapper/mnt/c280cd3d0bf0aa36b478b292279671624cceafc1a67eaa920fa1082601297adf rw,relatime shared:209 - ext4 /dev/mapper/docker-253:2-425882-c280cd3d0bf0aa36b478b292279671624cceafc1a67eaa920fa1082601297adf rw,seclabel,discard,stripe=16,data=ordered 231 35 253:31 / /var/lib/docker/devicemapper/mnt/8b59a7d9340279f09fea67fd6ad89ddef711e9e7050eb647984f8b5ef006335f rw,relatime shared:213 - ext4 /dev/mapper/docker-253:2-425882-8b59a7d9340279f09fea67fd6ad89ddef711e9e7050eb647984f8b5ef006335f rw,seclabel,discard,stripe=16,data=ordered 235 35 253:32 / /var/lib/docker/devicemapper/mnt/1a28059f29eda821578b1bb27a60cc71f76f846a551abefabce6efd0146dce9f rw,relatime shared:217 - ext4 /dev/mapper/docker-253:2-425882-1a28059f29eda821578b1bb27a60cc71f76f846a551abefabce6efd0146dce9f rw,seclabel,discard,stripe=16,data=ordered 239 35 253:33 / /var/lib/docker/devicemapper/mnt/e9aa60c60128cad1 rw,relatime shared:221 - ext4 /dev/mapper/docker-253:2-425882-e9aa60c60128cad1 rw,seclabel,discard,stripe=16,data=ordered 243 35 253:34 / /var/lib/docker/devicemapper/mnt/5fec11304b6f4713fea7b6ccdcc1adc0a1966187f590fe25a8227428a8df275d-init rw,relatime shared:225 - ext4 /dev/mapper/docker-253:2-425882-5fec11304b6f4713fea7b6ccdcc1adc0a1966187f590fe25a8227428a8df275d-init rw,seclabel,discard,stripe=16,data=ordered 247 35 253:35 / /var/lib/docker/devicemapper/mnt/5fec11304b6f4713fea7b6ccdcc1adc0a1966187f590fe25a8227428a8df275d rw,relatime shared:229 - ext4 /dev/mapper/docker-253:2-425882-5fec11304b6f4713fea7b6ccdcc1adc0a1966187f590fe25a8227428a8df275d rw,seclabel,discard,stripe=16,data=ordered 31 21 0:23 / /DATA/foo_bla_bla rw,relatime - cifs //foo/BLA\040BLA\040BLA/ rw,sec=ntlm,cache=loose,unc=\\foo\BLA BLA BLA,username=my_login,domain=mydomain.com,uid=12345678,forceuid,gid=12345678,forcegid,addr=10.1.30.10,file_mode=0755,dir_mode=0755,nounix,rsize=61440,wsize=65536,actimeo=1` const systemdMountinfo = `115 83 0:32 / / rw,relatime - aufs none rw,si=c0bd3d3,dio,dirperm1 116 115 0:35 / /proc rw,nosuid,nodev,noexec,relatime - proc proc rw 117 115 0:36 / /dev rw,nosuid - tmpfs tmpfs rw,mode=755 118 117 0:37 / /dev/pts rw,nosuid,noexec,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=666 119 115 0:38 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw 120 119 0:39 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw,mode=755 121 120 0:19 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,xattr,release_agent=/lib/systemd/systemd-cgroups-agent,name=systemd 122 120 0:20 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,devices 123 120 0:21 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,freezer 124 120 0:22 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory 125 120 0:23 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,net_cls,net_prio 126 120 0:24 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,blkio 127 120 0:25 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpuset,clone_children 128 120 0:26 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpu,cpuacct 129 120 0:27 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,perf_event,release_agent=/run/cgmanager/agents/cgm-release-agent.perf_event 130 115 43:0 /var/lib/docker/volumes/a44a712176377f57c094397330ee04387284c478364eb25f4c3d25f775f25c26/_data /var/lib/docker rw,relatime - ext4 /dev/nbd0 rw,data=ordered 131 115 43:0 /var/lib/docker/containers/dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e/resolv.conf /etc/resolv.conf rw,relatime - ext4 /dev/nbd0 rw,data=ordered 132 115 43:0 /var/lib/docker/containers/dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e/hostname /etc/hostname rw,relatime - ext4 /dev/nbd0 rw,data=ordered 133 115 43:0 /var/lib/docker/containers/dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e/hosts /etc/hosts rw,relatime - ext4 /dev/nbd0 rw,data=ordered 134 117 0:33 / /dev/shm rw,nosuid,nodev,noexec,relatime - tmpfs shm rw,size=65536k 135 117 0:13 / /dev/mqueue rw,nosuid,nodev,noexec,relatime - mqueue mqueue rw 136 117 0:12 /1 /dev/console rw,nosuid,noexec,relatime - devpts none rw,gid=5,mode=620,ptmxmode=000 84 115 0:40 / /tmp rw,relatime - tmpfs none rw` func TestGetCgroupMounts(t *testing.T) { type testData struct { mountInfo string root string subsystems map[string]bool } testTable := []testData{ { mountInfo: fedoraMountinfo, root: "/", subsystems: map[string]bool{ "cpuset": true, "cpu": true, "cpuacct": true, "memory": true, "devices": true, "freezer": true, "net_cls": true, "blkio": true, "perf_event": true, "hugetlb": true, }, }, { mountInfo: systemdMountinfo, root: "/system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope", subsystems: map[string]bool{ "cpuset": true, "cpu": true, "cpuacct": true, "memory": true, "devices": true, "freezer": true, "net_cls": true, "blkio": true, "perf_event": true, }, }, } for _, td := range testTable { mi := bytes.NewBufferString(td.mountInfo) cgMounts, err := getCgroupMountsHelper(td.subsystems, mi, false) if err != nil { t.Fatal(err) } cgMap := make(map[string]Mount) for _, m := range cgMounts { for _, ss := range m.Subsystems { cgMap[ss] = m } } for ss := range td.subsystems { m, ok := cgMap[ss] if !ok { t.Fatalf("%s not found", ss) } if m.Root != td.root { t.Fatalf("unexpected root for %s: %s", ss, m.Root) } if !strings.HasPrefix(m.Mountpoint, "/sys/fs/cgroup/") && !strings.Contains(m.Mountpoint, ss) { t.Fatalf("unexpected mountpoint for %s: %s", ss, m.Mountpoint) } var ssFound bool for _, mss := range m.Subsystems { if mss == ss { ssFound = true break } } if !ssFound { t.Fatalf("subsystem %s not found in Subsystems field %v", ss, m.Subsystems) } } } } func BenchmarkGetCgroupMounts(b *testing.B) { subsystems := map[string]bool{ "cpuset": true, "cpu": true, "cpuacct": true, "memory": true, "devices": true, "freezer": true, "net_cls": true, "blkio": true, "perf_event": true, "hugetlb": true, } b.ResetTimer() for i := 0; i < b.N; i++ { b.StopTimer() mi := bytes.NewBufferString(fedoraMountinfo) b.StartTimer() if _, err := getCgroupMountsHelper(subsystems, mi, false); err != nil { b.Fatal(err) } } } func TestParseCgroupString(t *testing.T) { testCases := []struct { input string expectedError error expectedOutput map[string]string }{ { // Taken from a CoreOS instance running systemd 225 with CPU/Mem // accounting enabled in systemd input: `9:blkio:/ 8:freezer:/ 7:perf_event:/ 6:devices:/system.slice/system-sshd.slice 5:cpuset:/ 4:cpu,cpuacct:/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service 3:net_cls,net_prio:/ 2:memory:/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service 1:name=systemd:/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service`, expectedOutput: map[string]string{ "name=systemd": "/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service", "blkio": "/", "freezer": "/", "perf_event": "/", "devices": "/system.slice/system-sshd.slice", "cpuset": "/", "cpu": "/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service", "cpuacct": "/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service", "net_cls": "/", "net_prio": "/", "memory": "/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service", }, }, { input: `malformed input`, expectedError: fmt.Errorf(`invalid cgroup entry: must contain at least two colons: malformed input`), }, } for ndx, testCase := range testCases { out, err := parseCgroupFromReader(strings.NewReader(testCase.input)) if err != nil { if testCase.expectedError == nil || testCase.expectedError.Error() != err.Error() { t.Errorf("%v: expected error %v, got error %v", ndx, testCase.expectedError, err) } } else { if !reflect.DeepEqual(testCase.expectedOutput, out) { t.Errorf("%v: expected output %v, got error %v", ndx, testCase.expectedOutput, out) } } } } docker-runc-tags-docker-1.13.1/libcontainer/compat_1.5_linux.go000066400000000000000000000003331304443252500243000ustar00rootroot00000000000000// +build linux,!go1.5 package libcontainer import "syscall" // GidMappingsEnableSetgroups was added in Go 1.5, so do nothing when building // with earlier versions func enableSetgroups(sys *syscall.SysProcAttr) { } docker-runc-tags-docker-1.13.1/libcontainer/configs/000077500000000000000000000000001304443252500223155ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/libcontainer/configs/blkio_device.go000066400000000000000000000036331304443252500252700ustar00rootroot00000000000000package configs import "fmt" // blockIODevice holds major:minor format supported in blkio cgroup type blockIODevice struct { // Major is the device's major number Major int64 `json:"major"` // Minor is the device's minor number Minor int64 `json:"minor"` } // WeightDevice struct holds a `major:minor weight`|`major:minor leaf_weight` pair type WeightDevice struct { blockIODevice // Weight is the bandwidth rate for the device, range is from 10 to 1000 Weight uint16 `json:"weight"` // LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only LeafWeight uint16 `json:"leafWeight"` } // NewWeightDevice returns a configured WeightDevice pointer func NewWeightDevice(major, minor int64, weight, leafWeight uint16) *WeightDevice { wd := &WeightDevice{} wd.Major = major wd.Minor = minor wd.Weight = weight wd.LeafWeight = leafWeight return wd } // WeightString formats the struct to be writable to the cgroup specific file func (wd *WeightDevice) WeightString() string { return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.Weight) } // LeafWeightString formats the struct to be writable to the cgroup specific file func (wd *WeightDevice) LeafWeightString() string { return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.LeafWeight) } // ThrottleDevice struct holds a `major:minor rate_per_second` pair type ThrottleDevice struct { blockIODevice // Rate is the IO rate limit per cgroup per device Rate uint64 `json:"rate"` } // NewThrottleDevice returns a configured ThrottleDevice pointer func NewThrottleDevice(major, minor int64, rate uint64) *ThrottleDevice { td := &ThrottleDevice{} td.Major = major td.Minor = minor td.Rate = rate return td } // String formats the struct to be writable to the cgroup specific file func (td *ThrottleDevice) String() string { return fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate) } docker-runc-tags-docker-1.13.1/libcontainer/configs/cgroup_unix.go000066400000000000000000000077621304443252500252220ustar00rootroot00000000000000// +build linux freebsd package configs type FreezerState string const ( Undefined FreezerState = "" Frozen FreezerState = "FROZEN" Thawed FreezerState = "THAWED" ) type Cgroup struct { // Deprecated, use Path instead Name string `json:"name,omitempty"` // name of parent of cgroup or slice // Deprecated, use Path instead Parent string `json:"parent,omitempty"` // Path specifies the path to cgroups that are created and/or joined by the container. // The path is assumed to be relative to the host system cgroup mountpoint. Path string `json:"path"` // ScopePrefix describes prefix for the scope name ScopePrefix string `json:"scope_prefix"` // Paths represent the absolute cgroups paths to join. // This takes precedence over Path. Paths map[string]string // Resources contains various cgroups settings to apply *Resources } type Resources struct { // If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list. // Deprecated AllowAllDevices *bool `json:"allow_all_devices,omitempty"` // Deprecated AllowedDevices []*Device `json:"allowed_devices,omitempty"` // Deprecated DeniedDevices []*Device `json:"denied_devices,omitempty"` Devices []*Device `json:"devices"` // Memory limit (in bytes) Memory int64 `json:"memory"` // Memory reservation or soft_limit (in bytes) MemoryReservation int64 `json:"memory_reservation"` // Total memory usage (memory + swap); set `-1` to enable unlimited swap MemorySwap int64 `json:"memory_swap"` // Kernel memory limit (in bytes) KernelMemory int64 `json:"kernel_memory"` // Kernel memory limit for TCP use (in bytes) KernelMemoryTCP int64 `json:"kernel_memory_tcp"` // CPU shares (relative weight vs. other containers) CpuShares int64 `json:"cpu_shares"` // CPU hardcap limit (in usecs). Allowed cpu time in a given period. CpuQuota int64 `json:"cpu_quota"` // CPU period to be used for hardcapping (in usecs). 0 to use system default. CpuPeriod int64 `json:"cpu_period"` // How many time CPU will use in realtime scheduling (in usecs). CpuRtRuntime int64 `json:"cpu_rt_quota"` // CPU period to be used for realtime scheduling (in usecs). CpuRtPeriod int64 `json:"cpu_rt_period"` // CPU to use CpusetCpus string `json:"cpuset_cpus"` // MEM to use CpusetMems string `json:"cpuset_mems"` // Process limit; set <= `0' to disable limit. PidsLimit int64 `json:"pids_limit"` // Specifies per cgroup weight, range is from 10 to 1000. BlkioWeight uint16 `json:"blkio_weight"` // Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only BlkioLeafWeight uint16 `json:"blkio_leaf_weight"` // Weight per cgroup per device, can override BlkioWeight. BlkioWeightDevice []*WeightDevice `json:"blkio_weight_device"` // IO read rate limit per cgroup per device, bytes per second. BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device"` // IO write rate limit per cgroup per device, bytes per second. BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device"` // IO read rate limit per cgroup per device, IO per second. BlkioThrottleReadIOPSDevice []*ThrottleDevice `json:"blkio_throttle_read_iops_device"` // IO write rate limit per cgroup per device, IO per second. BlkioThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkio_throttle_write_iops_device"` // set the freeze value for the process Freezer FreezerState `json:"freezer"` // Hugetlb limit (in bytes) HugetlbLimit []*HugepageLimit `json:"hugetlb_limit"` // Whether to disable OOM Killer OomKillDisable bool `json:"oom_kill_disable"` // Tuning swappiness behaviour per cgroup MemorySwappiness *int64 `json:"memory_swappiness"` // Set priority of network traffic for container NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"` // Set class identifier for container's network packets NetClsClassid uint32 `json:"net_cls_classid_u"` } docker-runc-tags-docker-1.13.1/libcontainer/configs/cgroup_unsupported.go000066400000000000000000000001141304443252500266070ustar00rootroot00000000000000// +build !windows,!linux,!freebsd package configs type Cgroup struct { } docker-runc-tags-docker-1.13.1/libcontainer/configs/cgroup_windows.go000066400000000000000000000002361304443252500257160ustar00rootroot00000000000000package configs // TODO Windows: This can ultimately be entirely factored out on Windows as // cgroups are a Unix-specific construct. type Cgroup struct { } docker-runc-tags-docker-1.13.1/libcontainer/configs/config.go000066400000000000000000000236561304443252500241250ustar00rootroot00000000000000package configs import ( "bytes" "encoding/json" "fmt" "os/exec" "time" "github.com/Sirupsen/logrus" ) type Rlimit struct { Type int `json:"type"` Hard uint64 `json:"hard"` Soft uint64 `json:"soft"` } // IDMap represents UID/GID Mappings for User Namespaces. type IDMap struct { ContainerID int `json:"container_id"` HostID int `json:"host_id"` Size int `json:"size"` } // Seccomp represents syscall restrictions // By default, only the native architecture of the kernel is allowed to be used // for syscalls. Additional architectures can be added by specifying them in // Architectures. type Seccomp struct { DefaultAction Action `json:"default_action"` Architectures []string `json:"architectures"` Syscalls []*Syscall `json:"syscalls"` } // Action is taken upon rule match in Seccomp type Action int const ( Kill Action = iota + 1 Errno Trap Allow Trace ) // Operator is a comparison operator to be used when matching syscall arguments in Seccomp type Operator int const ( EqualTo Operator = iota + 1 NotEqualTo GreaterThan GreaterThanOrEqualTo LessThan LessThanOrEqualTo MaskEqualTo ) // Arg is a rule to match a specific syscall argument in Seccomp type Arg struct { Index uint `json:"index"` Value uint64 `json:"value"` ValueTwo uint64 `json:"value_two"` Op Operator `json:"op"` } // Syscall is a rule to match a syscall in Seccomp type Syscall struct { Name string `json:"name"` Action Action `json:"action"` Args []*Arg `json:"args"` } // TODO Windows. Many of these fields should be factored out into those parts // which are common across platforms, and those which are platform specific. // Config defines configuration options for executing a process inside a contained environment. type Config struct { // NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs // This is a common option when the container is running in ramdisk NoPivotRoot bool `json:"no_pivot_root"` // ParentDeathSignal specifies the signal that is sent to the container's process in the case // that the parent process dies. ParentDeathSignal int `json:"parent_death_signal"` // Path to a directory containing the container's root filesystem. Rootfs string `json:"rootfs"` // Readonlyfs will remount the container's rootfs as readonly where only externally mounted // bind mounts are writtable. Readonlyfs bool `json:"readonlyfs"` // Specifies the mount propagation flags to be applied to /. RootPropagation int `json:"rootPropagation"` // Mounts specify additional source and destination paths that will be mounted inside the container's // rootfs and mount namespace if specified Mounts []*Mount `json:"mounts"` // The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well! Devices []*Device `json:"devices"` MountLabel string `json:"mount_label"` // Hostname optionally sets the container's hostname if provided Hostname string `json:"hostname"` // Namespaces specifies the container's namespaces that it should setup when cloning the init process // If a namespace is not provided that namespace is shared from the container's parent process Namespaces Namespaces `json:"namespaces"` // Capabilities specify the capabilities to keep when executing the process inside the container // All capbilities not specified will be dropped from the processes capability mask Capabilities []string `json:"capabilities"` // Networks specifies the container's network setup to be created Networks []*Network `json:"networks"` // Routes can be specified to create entries in the route table as the container is started Routes []*Route `json:"routes"` // Cgroups specifies specific cgroup settings for the various subsystems that the container is // placed into to limit the resources the container has available Cgroups *Cgroup `json:"cgroups"` // AppArmorProfile specifies the profile to apply to the process running in the container and is // change at the time the process is execed AppArmorProfile string `json:"apparmor_profile,omitempty"` // ProcessLabel specifies the label to apply to the process running in the container. It is // commonly used by selinux ProcessLabel string `json:"process_label,omitempty"` // Rlimits specifies the resource limits, such as max open files, to set in the container // If Rlimits are not set, the container will inherit rlimits from the parent process Rlimits []Rlimit `json:"rlimits,omitempty"` // OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores // for a process. Valid values are between the range [-1000, '1000'], where processes with // higher scores are preferred for being killed. // More information about kernel oom score calculation here: https://lwn.net/Articles/317814/ OomScoreAdj int `json:"oom_score_adj"` // UidMappings is an array of User ID mappings for User Namespaces UidMappings []IDMap `json:"uid_mappings"` // GidMappings is an array of Group ID mappings for User Namespaces GidMappings []IDMap `json:"gid_mappings"` // MaskPaths specifies paths within the container's rootfs to mask over with a bind // mount pointing to /dev/null as to prevent reads of the file. MaskPaths []string `json:"mask_paths"` // ReadonlyPaths specifies paths within the container's rootfs to remount as read-only // so that these files prevent any writes. ReadonlyPaths []string `json:"readonly_paths"` // Sysctl is a map of properties and their values. It is the equivalent of using // sysctl -w my.property.name value in Linux. Sysctl map[string]string `json:"sysctl"` // Seccomp allows actions to be taken whenever a syscall is made within the container. // A number of rules are given, each having an action to be taken if a syscall matches it. // A default action to be taken if no rules match is also given. Seccomp *Seccomp `json:"seccomp"` // NoNewPrivileges controls whether processes in the container can gain additional privileges. NoNewPrivileges bool `json:"no_new_privileges,omitempty"` // Hooks are a collection of actions to perform at various container lifecycle events. // CommandHooks are serialized to JSON, but other hooks are not. Hooks *Hooks // Version is the version of opencontainer specification that is supported. Version string `json:"version"` // Labels are user defined metadata that is stored in the config and populated on the state Labels []string `json:"labels"` // NoNewKeyring will not allocated a new session keyring for the container. It will use the // callers keyring in this case. NoNewKeyring bool `json:"no_new_keyring"` } type Hooks struct { // Prestart commands are executed after the container namespaces are created, // but before the user supplied command is executed from init. Prestart []Hook // Poststart commands are executed after the container init process starts. Poststart []Hook // Poststop commands are executed after the container init process exits. Poststop []Hook } func (hooks *Hooks) UnmarshalJSON(b []byte) error { var state struct { Prestart []CommandHook Poststart []CommandHook Poststop []CommandHook } if err := json.Unmarshal(b, &state); err != nil { return err } deserialize := func(shooks []CommandHook) (hooks []Hook) { for _, shook := range shooks { hooks = append(hooks, shook) } return hooks } hooks.Prestart = deserialize(state.Prestart) hooks.Poststart = deserialize(state.Poststart) hooks.Poststop = deserialize(state.Poststop) return nil } func (hooks Hooks) MarshalJSON() ([]byte, error) { serialize := func(hooks []Hook) (serializableHooks []CommandHook) { for _, hook := range hooks { switch chook := hook.(type) { case CommandHook: serializableHooks = append(serializableHooks, chook) default: logrus.Warnf("cannot serialize hook of type %T, skipping", hook) } } return serializableHooks } return json.Marshal(map[string]interface{}{ "prestart": serialize(hooks.Prestart), "poststart": serialize(hooks.Poststart), "poststop": serialize(hooks.Poststop), }) } // HookState is the payload provided to a hook on execution. type HookState struct { Version string `json:"ociVersion"` ID string `json:"id"` Pid int `json:"pid"` Root string `json:"root"` BundlePath string `json:"bundlePath"` } type Hook interface { // Run executes the hook with the provided state. Run(HookState) error } // NewFunctionHook will call the provided function when the hook is run. func NewFunctionHook(f func(HookState) error) FuncHook { return FuncHook{ run: f, } } type FuncHook struct { run func(HookState) error } func (f FuncHook) Run(s HookState) error { return f.run(s) } type Command struct { Path string `json:"path"` Args []string `json:"args"` Env []string `json:"env"` Dir string `json:"dir"` Timeout *time.Duration `json:"timeout"` } // NewCommandHook will execute the provided command when the hook is run. func NewCommandHook(cmd Command) CommandHook { return CommandHook{ Command: cmd, } } type CommandHook struct { Command } func (c Command) Run(s HookState) error { b, err := json.Marshal(s) if err != nil { return err } var stdout, stderr bytes.Buffer cmd := exec.Cmd{ Path: c.Path, Args: c.Args, Env: c.Env, Stdin: bytes.NewReader(b), Stdout: &stdout, Stderr: &stderr, } if err := cmd.Start(); err != nil { return err } errC := make(chan error, 1) go func() { err := cmd.Wait() if err != nil { err = fmt.Errorf("error running hook: %v, stdout: %s, stderr: %s", err, stdout.String(), stderr.String()) } errC <- err }() var timerCh <-chan time.Time if c.Timeout != nil { timer := time.NewTimer(*c.Timeout) defer timer.Stop() timerCh = timer.C } select { case err := <-errC: return err case <-timerCh: cmd.Process.Kill() cmd.Wait() return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds()) } } docker-runc-tags-docker-1.13.1/libcontainer/configs/config_test.go000066400000000000000000000103311304443252500251460ustar00rootroot00000000000000package configs_test import ( "encoding/json" "fmt" "os" "reflect" "testing" "time" "github.com/opencontainers/runc/libcontainer/configs" ) func TestUnmarshalHooks(t *testing.T) { timeout := time.Second prestartCmd := configs.NewCommandHook(configs.Command{ Path: "/var/vcap/hooks/prestart", Args: []string{"--pid=123"}, Env: []string{"FOO=BAR"}, Dir: "/var/vcap", Timeout: &timeout, }) prestart, err := json.Marshal(prestartCmd.Command) if err != nil { t.Fatal(err) } hook := configs.Hooks{} err = hook.UnmarshalJSON([]byte(fmt.Sprintf(`{"Prestart" :[%s]}`, prestart))) if err != nil { t.Fatal(err) } if !reflect.DeepEqual(hook.Prestart[0], prestartCmd) { t.Errorf("Expected prestart to equal %+v but it was %+v", prestartCmd, hook.Prestart[0]) } } func TestUnmarshalHooksWithInvalidData(t *testing.T) { hook := configs.Hooks{} err := hook.UnmarshalJSON([]byte(`{invalid-json}`)) if err == nil { t.Error("Expected error to occur but it was nil") } } func TestMarshalHooks(t *testing.T) { timeout := time.Second prestartCmd := configs.NewCommandHook(configs.Command{ Path: "/var/vcap/hooks/prestart", Args: []string{"--pid=123"}, Env: []string{"FOO=BAR"}, Dir: "/var/vcap", Timeout: &timeout, }) hook := configs.Hooks{ Prestart: []configs.Hook{prestartCmd}, } hooks, err := hook.MarshalJSON() if err != nil { t.Fatal(err) } h := `{"poststart":null,"poststop":null,"prestart":[{"path":"/var/vcap/hooks/prestart","args":["--pid=123"],"env":["FOO=BAR"],"dir":"/var/vcap","timeout":1000000000}]}` if string(hooks) != h { t.Errorf("Expected hooks %s to equal %s", string(hooks), h) } } func TestMarshalUnmarshalHooks(t *testing.T) { timeout := time.Second prestart := configs.NewCommandHook(configs.Command{ Path: "/var/vcap/hooks/prestart", Args: []string{"--pid=123"}, Env: []string{"FOO=BAR"}, Dir: "/var/vcap", Timeout: &timeout, }) hook := configs.Hooks{ Prestart: []configs.Hook{prestart}, } hooks, err := hook.MarshalJSON() if err != nil { t.Fatal(err) } umMhook := configs.Hooks{} err = umMhook.UnmarshalJSON(hooks) if err != nil { t.Fatal(err) } if !reflect.DeepEqual(umMhook.Prestart[0], prestart) { t.Errorf("Expected hooks to be equal after mashaling -> unmarshaling them: %+v, %+v", umMhook.Prestart[0], prestart) } } func TestMarshalHooksWithUnexpectedType(t *testing.T) { fHook := configs.NewFunctionHook(func(configs.HookState) error { return nil }) hook := configs.Hooks{ Prestart: []configs.Hook{fHook}, } hooks, err := hook.MarshalJSON() if err != nil { t.Fatal(err) } h := `{"poststart":null,"poststop":null,"prestart":null}` if string(hooks) != h { t.Errorf("Expected hooks %s to equal %s", string(hooks), h) } } func TestFuncHookRun(t *testing.T) { state := configs.HookState{ Version: "1", ID: "1", Pid: 1, Root: "root", } fHook := configs.NewFunctionHook(func(s configs.HookState) error { if !reflect.DeepEqual(state, s) { t.Errorf("Expected state %+v to equal %+v", state, s) } return nil }) fHook.Run(state) } func TestCommandHookRun(t *testing.T) { state := configs.HookState{ Version: "1", ID: "1", Pid: 1, Root: "root", } timeout := time.Second cmdHook := configs.NewCommandHook(configs.Command{ Path: os.Args[0], Args: []string{os.Args[0], "-test.run=TestHelperProcess"}, Env: []string{"FOO=BAR"}, Dir: "/", Timeout: &timeout, }) err := cmdHook.Run(state) if err != nil { t.Errorf(fmt.Sprintf("Expected error to not occur but it was %+v", err)) } } func TestCommandHookRunTimeout(t *testing.T) { state := configs.HookState{ Version: "1", ID: "1", Pid: 1, Root: "root", } timeout := (10 * time.Millisecond) cmdHook := configs.NewCommandHook(configs.Command{ Path: os.Args[0], Args: []string{os.Args[0], "-test.run=TestHelperProcessWithTimeout"}, Env: []string{"FOO=BAR"}, Dir: "/", Timeout: &timeout, }) err := cmdHook.Run(state) if err == nil { t.Error("Expected error to occur but it was nil") } } func TestHelperProcess(*testing.T) { fmt.Println("Helper Process") os.Exit(0) } func TestHelperProcessWithTimeout(*testing.T) { time.Sleep(time.Second) } docker-runc-tags-docker-1.13.1/libcontainer/configs/config_unix.go000066400000000000000000000027721304443252500251640ustar00rootroot00000000000000// +build freebsd linux package configs import "fmt" // HostUID gets the root uid for the process on host which could be non-zero // when user namespaces are enabled. func (c Config) HostUID() (int, error) { if c.Namespaces.Contains(NEWUSER) { if c.UidMappings == nil { return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.") } id, found := c.hostIDFromMapping(0, c.UidMappings) if !found { return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.") } return id, nil } // Return default root uid 0 return 0, nil } // HostGID gets the root gid for the process on host which could be non-zero // when user namespaces are enabled. func (c Config) HostGID() (int, error) { if c.Namespaces.Contains(NEWUSER) { if c.GidMappings == nil { return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.") } id, found := c.hostIDFromMapping(0, c.GidMappings) if !found { return -1, fmt.Errorf("User namespaces enabled, but no root group mapping found.") } return id, nil } // Return default root gid 0 return 0, nil } // Utility function that gets a host ID for a container ID from user namespace map // if that ID is present in the map. func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) { for _, m := range uMap { if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) { hostID := m.HostID + (containerID - m.ContainerID) return hostID, true } } return -1, false } docker-runc-tags-docker-1.13.1/libcontainer/configs/config_unix_test.go000066400000000000000000000046171304443252500262230ustar00rootroot00000000000000// +build linux freebsd package configs import ( "encoding/json" "fmt" "os" "path/filepath" "testing" ) func loadConfig(name string) (*Config, error) { f, err := os.Open(filepath.Join("../sample_configs", name)) if err != nil { return nil, err } defer f.Close() var container *Config if err := json.NewDecoder(f).Decode(&container); err != nil { return nil, err } // Check that a config doesn't contain extra fields var configMap, abstractMap map[string]interface{} if _, err := f.Seek(0, 0); err != nil { return nil, err } if err := json.NewDecoder(f).Decode(&abstractMap); err != nil { return nil, err } configData, err := json.Marshal(&container) if err != nil { return nil, err } if err := json.Unmarshal(configData, &configMap); err != nil { return nil, err } for k := range configMap { delete(abstractMap, k) } if len(abstractMap) != 0 { return nil, fmt.Errorf("unknown fields: %s", abstractMap) } return container, nil } func TestRemoveNamespace(t *testing.T) { ns := Namespaces{ {Type: NEWNET}, } if !ns.Remove(NEWNET) { t.Fatal("NEWNET was not removed") } if len(ns) != 0 { t.Fatalf("namespaces should have 0 items but reports %d", len(ns)) } } func TestHostUIDNoUSERNS(t *testing.T) { config := &Config{ Namespaces: Namespaces{}, } uid, err := config.HostUID() if err != nil { t.Fatal(err) } if uid != 0 { t.Fatalf("expected uid 0 with no USERNS but received %d", uid) } } func TestHostUIDWithUSERNS(t *testing.T) { config := &Config{ Namespaces: Namespaces{{Type: NEWUSER}}, UidMappings: []IDMap{ { ContainerID: 0, HostID: 1000, Size: 1, }, }, } uid, err := config.HostUID() if err != nil { t.Fatal(err) } if uid != 1000 { t.Fatalf("expected uid 1000 with no USERNS but received %d", uid) } } func TestHostGIDNoUSERNS(t *testing.T) { config := &Config{ Namespaces: Namespaces{}, } uid, err := config.HostGID() if err != nil { t.Fatal(err) } if uid != 0 { t.Fatalf("expected gid 0 with no USERNS but received %d", uid) } } func TestHostGIDWithUSERNS(t *testing.T) { config := &Config{ Namespaces: Namespaces{{Type: NEWUSER}}, GidMappings: []IDMap{ { ContainerID: 0, HostID: 1000, Size: 1, }, }, } uid, err := config.HostGID() if err != nil { t.Fatal(err) } if uid != 1000 { t.Fatalf("expected gid 1000 with no USERNS but received %d", uid) } } docker-runc-tags-docker-1.13.1/libcontainer/configs/config_windows_test.go000066400000000000000000000001121304443252500267140ustar00rootroot00000000000000package configs // All current tests are for Unix-specific functionality docker-runc-tags-docker-1.13.1/libcontainer/configs/device.go000066400000000000000000000022511304443252500241030ustar00rootroot00000000000000package configs import ( "fmt" "os" ) const ( Wildcard = -1 ) // TODO Windows: This can be factored out in the future type Device struct { // Device type, block, char, etc. Type rune `json:"type"` // Path to the device. Path string `json:"path"` // Major is the device's major number. Major int64 `json:"major"` // Minor is the device's minor number. Minor int64 `json:"minor"` // Cgroup permissions format, rwm. Permissions string `json:"permissions"` // FileMode permission bits for the device. FileMode os.FileMode `json:"file_mode"` // Uid of the device. Uid uint32 `json:"uid"` // Gid of the device. Gid uint32 `json:"gid"` // Write the file to the allowed list Allow bool `json:"allow"` } func (d *Device) CgroupString() string { return fmt.Sprintf("%c %s:%s %s", d.Type, deviceNumberString(d.Major), deviceNumberString(d.Minor), d.Permissions) } func (d *Device) Mkdev() int { return int((d.Major << 8) | (d.Minor & 0xff) | ((d.Minor & 0xfff00) << 12)) } // deviceNumberString converts the device number to a string return result. func deviceNumberString(number int64) string { if number == Wildcard { return "*" } return fmt.Sprint(number) } docker-runc-tags-docker-1.13.1/libcontainer/configs/device_defaults.go000066400000000000000000000037421304443252500260000ustar00rootroot00000000000000// +build linux freebsd package configs var ( // DefaultSimpleDevices are devices that are to be both allowed and created. DefaultSimpleDevices = []*Device{ // /dev/null and zero { Path: "/dev/null", Type: 'c', Major: 1, Minor: 3, Permissions: "rwm", FileMode: 0666, }, { Path: "/dev/zero", Type: 'c', Major: 1, Minor: 5, Permissions: "rwm", FileMode: 0666, }, { Path: "/dev/full", Type: 'c', Major: 1, Minor: 7, Permissions: "rwm", FileMode: 0666, }, // consoles and ttys { Path: "/dev/tty", Type: 'c', Major: 5, Minor: 0, Permissions: "rwm", FileMode: 0666, }, // /dev/urandom,/dev/random { Path: "/dev/urandom", Type: 'c', Major: 1, Minor: 9, Permissions: "rwm", FileMode: 0666, }, { Path: "/dev/random", Type: 'c', Major: 1, Minor: 8, Permissions: "rwm", FileMode: 0666, }, } DefaultAllowedDevices = append([]*Device{ // allow mknod for any device { Type: 'c', Major: Wildcard, Minor: Wildcard, Permissions: "m", }, { Type: 'b', Major: Wildcard, Minor: Wildcard, Permissions: "m", }, { Path: "/dev/console", Type: 'c', Major: 5, Minor: 1, Permissions: "rwm", }, // /dev/pts/ - pts namespaces are "coming soon" { Path: "", Type: 'c', Major: 136, Minor: Wildcard, Permissions: "rwm", }, { Path: "", Type: 'c', Major: 5, Minor: 2, Permissions: "rwm", }, // tuntap { Path: "", Type: 'c', Major: 10, Minor: 200, Permissions: "rwm", }, }, DefaultSimpleDevices...) DefaultAutoCreatedDevices = append([]*Device{}, DefaultSimpleDevices...) ) docker-runc-tags-docker-1.13.1/libcontainer/configs/hugepage_limit.go000066400000000000000000000002641304443252500256310ustar00rootroot00000000000000package configs type HugepageLimit struct { // which type of hugepage to limit. Pagesize string `json:"page_size"` // usage limit for hugepage. Limit uint64 `json:"limit"` } docker-runc-tags-docker-1.13.1/libcontainer/configs/interface_priority_map.go000066400000000000000000000003541304443252500274040ustar00rootroot00000000000000package configs import ( "fmt" ) type IfPrioMap struct { Interface string `json:"interface"` Priority int64 `json:"priority"` } func (i *IfPrioMap) CgroupString() string { return fmt.Sprintf("%s %d", i.Interface, i.Priority) } docker-runc-tags-docker-1.13.1/libcontainer/configs/mount.go000066400000000000000000000017531304443252500240140ustar00rootroot00000000000000package configs const ( // EXT_COPYUP is a directive to copy up the contents of a directory when // a tmpfs is mounted over it. EXT_COPYUP = 1 << iota ) type Mount struct { // Source path for the mount. Source string `json:"source"` // Destination path for the mount inside the container. Destination string `json:"destination"` // Device the mount is for. Device string `json:"device"` // Mount flags. Flags int `json:"flags"` // Propagation Flags PropagationFlags []int `json:"propagation_flags"` // Mount data applied to the mount. Data string `json:"data"` // Relabel source if set, "z" indicates shared, "Z" indicates unshared. Relabel string `json:"relabel"` // Extensions are additional flags that are specific to runc. Extensions int `json:"extensions"` // Optional Command to be run before Source is mounted. PremountCmds []Command `json:"premount_cmds"` // Optional Command to be run after Source is mounted. PostmountCmds []Command `json:"postmount_cmds"` } docker-runc-tags-docker-1.13.1/libcontainer/configs/namespaces.go000066400000000000000000000001101304443252500247530ustar00rootroot00000000000000package configs type NamespaceType string type Namespaces []Namespace docker-runc-tags-docker-1.13.1/libcontainer/configs/namespaces_syscall.go000066400000000000000000000012641304443252500265200ustar00rootroot00000000000000// +build linux package configs import "syscall" func (n *Namespace) Syscall() int { return namespaceInfo[n.Type] } var namespaceInfo = map[NamespaceType]int{ NEWNET: syscall.CLONE_NEWNET, NEWNS: syscall.CLONE_NEWNS, NEWUSER: syscall.CLONE_NEWUSER, NEWIPC: syscall.CLONE_NEWIPC, NEWUTS: syscall.CLONE_NEWUTS, NEWPID: syscall.CLONE_NEWPID, } // CloneFlags parses the container's Namespaces options to set the correct // flags on clone, unshare. This function returns flags only for new namespaces. func (n *Namespaces) CloneFlags() uintptr { var flag int for _, v := range *n { if v.Path != "" { continue } flag |= namespaceInfo[v.Type] } return uintptr(flag) } docker-runc-tags-docker-1.13.1/libcontainer/configs/namespaces_syscall_unsupported.go000066400000000000000000000006101304443252500311620ustar00rootroot00000000000000// +build !linux,!windows package configs func (n *Namespace) Syscall() int { panic("No namespace syscall support") return 0 } // CloneFlags parses the container's Namespaces options to set the correct // flags on clone, unshare. This function returns flags only for new namespaces. func (n *Namespaces) CloneFlags() uintptr { panic("No namespace syscall support") return uintptr(0) } docker-runc-tags-docker-1.13.1/libcontainer/configs/namespaces_unix.go000066400000000000000000000045551304443252500260370ustar00rootroot00000000000000// +build linux freebsd package configs import ( "fmt" "os" "sync" ) const ( NEWNET NamespaceType = "NEWNET" NEWPID NamespaceType = "NEWPID" NEWNS NamespaceType = "NEWNS" NEWUTS NamespaceType = "NEWUTS" NEWIPC NamespaceType = "NEWIPC" NEWUSER NamespaceType = "NEWUSER" ) var ( nsLock sync.Mutex supportedNamespaces = make(map[NamespaceType]bool) ) // NsName converts the namespace type to its filename func NsName(ns NamespaceType) string { switch ns { case NEWNET: return "net" case NEWNS: return "mnt" case NEWPID: return "pid" case NEWIPC: return "ipc" case NEWUSER: return "user" case NEWUTS: return "uts" } return "" } // IsNamespaceSupported returns whether a namespace is available or // not func IsNamespaceSupported(ns NamespaceType) bool { nsLock.Lock() defer nsLock.Unlock() supported, ok := supportedNamespaces[ns] if ok { return supported } nsFile := NsName(ns) // if the namespace type is unknown, just return false if nsFile == "" { return false } _, err := os.Stat(fmt.Sprintf("/proc/self/ns/%s", nsFile)) // a namespace is supported if it exists and we have permissions to read it supported = err == nil supportedNamespaces[ns] = supported return supported } func NamespaceTypes() []NamespaceType { return []NamespaceType{ NEWNET, NEWPID, NEWNS, NEWUTS, NEWIPC, NEWUSER, } } // Namespace defines configuration for each namespace. It specifies an // alternate path that is able to be joined via setns. type Namespace struct { Type NamespaceType `json:"type"` Path string `json:"path"` } func (n *Namespace) GetPath(pid int) string { if n.Path != "" { return n.Path } return fmt.Sprintf("/proc/%d/ns/%s", pid, NsName(n.Type)) } func (n *Namespaces) Remove(t NamespaceType) bool { i := n.index(t) if i == -1 { return false } *n = append((*n)[:i], (*n)[i+1:]...) return true } func (n *Namespaces) Add(t NamespaceType, path string) { i := n.index(t) if i == -1 { *n = append(*n, Namespace{Type: t, Path: path}) return } (*n)[i].Path = path } func (n *Namespaces) index(t NamespaceType) int { for i, ns := range *n { if ns.Type == t { return i } } return -1 } func (n *Namespaces) Contains(t NamespaceType) bool { return n.index(t) != -1 } func (n *Namespaces) PathOf(t NamespaceType) string { i := n.index(t) if i == -1 { return "" } return (*n)[i].Path } docker-runc-tags-docker-1.13.1/libcontainer/configs/namespaces_unsupported.go000066400000000000000000000003051304443252500274310ustar00rootroot00000000000000// +build !linux,!freebsd package configs // Namespace defines configuration for each namespace. It specifies an // alternate path that is able to be joined via setns. type Namespace struct { } docker-runc-tags-docker-1.13.1/libcontainer/configs/network.go000066400000000000000000000055141304443252500243420ustar00rootroot00000000000000package configs // Network defines configuration for a container's networking stack // // The network configuration can be omitted from a container causing the // container to be setup with the host's networking stack type Network struct { // Type sets the networks type, commonly veth and loopback Type string `json:"type"` // Name of the network interface Name string `json:"name"` // The bridge to use. Bridge string `json:"bridge"` // MacAddress contains the MAC address to set on the network interface MacAddress string `json:"mac_address"` // Address contains the IPv4 and mask to set on the network interface Address string `json:"address"` // Gateway sets the gateway address that is used as the default for the interface Gateway string `json:"gateway"` // IPv6Address contains the IPv6 and mask to set on the network interface IPv6Address string `json:"ipv6_address"` // IPv6Gateway sets the ipv6 gateway address that is used as the default for the interface IPv6Gateway string `json:"ipv6_gateway"` // Mtu sets the mtu value for the interface and will be mirrored on both the host and // container's interfaces if a pair is created, specifically in the case of type veth // Note: This does not apply to loopback interfaces. Mtu int `json:"mtu"` // TxQueueLen sets the tx_queuelen value for the interface and will be mirrored on both the host and // container's interfaces if a pair is created, specifically in the case of type veth // Note: This does not apply to loopback interfaces. TxQueueLen int `json:"txqueuelen"` // HostInterfaceName is a unique name of a veth pair that resides on in the host interface of the // container. HostInterfaceName string `json:"host_interface_name"` // HairpinMode specifies if hairpin NAT should be enabled on the virtual interface // bridge port in the case of type veth // Note: This is unsupported on some systems. // Note: This does not apply to loopback interfaces. HairpinMode bool `json:"hairpin_mode"` } // Routes can be specified to create entries in the route table as the container is started // // All of destination, source, and gateway should be either IPv4 or IPv6. // One of the three options must be present, and omitted entries will use their // IP family default for the route table. For IPv4 for example, setting the // gateway to 1.2.3.4 and the interface to eth0 will set up a standard // destination of 0.0.0.0(or *) when viewed in the route table. type Route struct { // Sets the destination and mask, should be a CIDR. Accepts IPv4 and IPv6 Destination string `json:"destination"` // Sets the source and mask, should be a CIDR. Accepts IPv4 and IPv6 Source string `json:"source"` // Sets the gateway. Accepts IPv4 and IPv6 Gateway string `json:"gateway"` // The device to set this route up for, for example: eth0 InterfaceName string `json:"interface_name"` } docker-runc-tags-docker-1.13.1/libcontainer/configs/validate/000077500000000000000000000000001304443252500241065ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/libcontainer/configs/validate/validator.go000066400000000000000000000114251304443252500264250ustar00rootroot00000000000000package validate import ( "fmt" "os" "path/filepath" "strings" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/selinux" ) type Validator interface { Validate(*configs.Config) error } func New() Validator { return &ConfigValidator{} } type ConfigValidator struct { } func (v *ConfigValidator) Validate(config *configs.Config) error { if err := v.rootfs(config); err != nil { return err } if err := v.network(config); err != nil { return err } if err := v.hostname(config); err != nil { return err } if err := v.security(config); err != nil { return err } if err := v.usernamespace(config); err != nil { return err } if err := v.sysctl(config); err != nil { return err } return nil } // rootfs validates if the rootfs is an absolute path and is not a symlink // to the container's root filesystem. func (v *ConfigValidator) rootfs(config *configs.Config) error { if _, err := os.Stat(config.Rootfs); err != nil { if os.IsNotExist(err) { return fmt.Errorf("rootfs (%s) does not exist", config.Rootfs) } return err } cleaned, err := filepath.Abs(config.Rootfs) if err != nil { return err } if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil { return err } if filepath.Clean(config.Rootfs) != cleaned { return fmt.Errorf("%s is not an absolute path or is a symlink", config.Rootfs) } return nil } func (v *ConfigValidator) network(config *configs.Config) error { if !config.Namespaces.Contains(configs.NEWNET) { if len(config.Networks) > 0 || len(config.Routes) > 0 { return fmt.Errorf("unable to apply network settings without a private NET namespace") } } return nil } func (v *ConfigValidator) hostname(config *configs.Config) error { if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) { return fmt.Errorf("unable to set hostname without a private UTS namespace") } return nil } func (v *ConfigValidator) security(config *configs.Config) error { // restrict sys without mount namespace if (len(config.MaskPaths) > 0 || len(config.ReadonlyPaths) > 0) && !config.Namespaces.Contains(configs.NEWNS) { return fmt.Errorf("unable to restrict sys entries without a private MNT namespace") } if config.ProcessLabel != "" && !selinux.SelinuxEnabled() { return fmt.Errorf("selinux label is specified in config, but selinux is disabled or not supported") } return nil } func (v *ConfigValidator) usernamespace(config *configs.Config) error { if config.Namespaces.Contains(configs.NEWUSER) { if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { return fmt.Errorf("USER namespaces aren't enabled in the kernel") } } else { if config.UidMappings != nil || config.GidMappings != nil { return fmt.Errorf("User namespace mappings specified, but USER namespace isn't enabled in the config") } } return nil } // sysctl validates that the specified sysctl keys are valid or not. // /proc/sys isn't completely namespaced and depending on which namespaces // are specified, a subset of sysctls are permitted. func (v *ConfigValidator) sysctl(config *configs.Config) error { validSysctlMap := map[string]bool{ "kernel.msgmax": true, "kernel.msgmnb": true, "kernel.msgmni": true, "kernel.sem": true, "kernel.shmall": true, "kernel.shmmax": true, "kernel.shmmni": true, "kernel.shm_rmid_forced": true, } for s := range config.Sysctl { if validSysctlMap[s] || strings.HasPrefix(s, "fs.mqueue.") { if config.Namespaces.Contains(configs.NEWIPC) { continue } else { return fmt.Errorf("sysctl %q is not allowed in the hosts ipc namespace", s) } } if strings.HasPrefix(s, "net.") { if config.Namespaces.Contains(configs.NEWNET) { if path := config.Namespaces.PathOf(configs.NEWNET); path != "" { if err := checkHostNs(s, path); err != nil { return err } } continue } else { return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", s) } } return fmt.Errorf("sysctl %q is not in a separate kernel namespace", s) } return nil } // checkHostNs checks whether network sysctl is used in host namespace. func checkHostNs(sysctlConfig string, path string) error { var currentProcessNetns = "/proc/self/ns/net" // readlink on the current processes network namespace destOfCurrentProcess, err := os.Readlink(currentProcessNetns) if err != nil { return fmt.Errorf("read soft link %q error", currentProcessNetns) } // readlink on the path provided in the struct destOfContainer, err := os.Readlink(path) if err != nil { return fmt.Errorf("read soft link %q error", path) } if destOfContainer == destOfCurrentProcess { return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", sysctlConfig) } return nil } docker-runc-tags-docker-1.13.1/libcontainer/configs/validate/validator_test.go000066400000000000000000000131131304443252500274600ustar00rootroot00000000000000package validate_test import ( "os" "testing" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs/validate" ) func TestValidate(t *testing.T) { config := &configs.Config{ Rootfs: "/var", } validator := validate.New() err := validator.Validate(config) if err != nil { t.Errorf("Expected error to not occur: %+v", err) } } func TestValidateWithInvalidRootfs(t *testing.T) { dir := "rootfs" os.Symlink("/var", dir) defer os.Remove(dir) config := &configs.Config{ Rootfs: dir, } validator := validate.New() err := validator.Validate(config) if err == nil { t.Error("Expected error to occur but it was nil") } } func TestValidateNetworkWithoutNETNamespace(t *testing.T) { network := &configs.Network{Type: "loopback"} config := &configs.Config{ Rootfs: "/var", Namespaces: []configs.Namespace{}, Networks: []*configs.Network{network}, } validator := validate.New() err := validator.Validate(config) if err == nil { t.Error("Expected error to occur but it was nil") } } func TestValidateNetworkRoutesWithoutNETNamespace(t *testing.T) { route := &configs.Route{Gateway: "255.255.255.0"} config := &configs.Config{ Rootfs: "/var", Namespaces: []configs.Namespace{}, Routes: []*configs.Route{route}, } validator := validate.New() err := validator.Validate(config) if err == nil { t.Error("Expected error to occur but it was nil") } } func TestValidateHostname(t *testing.T) { config := &configs.Config{ Rootfs: "/var", Hostname: "runc", Namespaces: configs.Namespaces( []configs.Namespace{ {Type: configs.NEWUTS}, }, ), } validator := validate.New() err := validator.Validate(config) if err != nil { t.Errorf("Expected error to not occur: %+v", err) } } func TestValidateHostnameWithoutUTSNamespace(t *testing.T) { config := &configs.Config{ Rootfs: "/var", Hostname: "runc", } validator := validate.New() err := validator.Validate(config) if err == nil { t.Error("Expected error to occur but it was nil") } } func TestValidateSecurityWithMaskPaths(t *testing.T) { config := &configs.Config{ Rootfs: "/var", MaskPaths: []string{"/proc/kcore"}, Namespaces: configs.Namespaces( []configs.Namespace{ {Type: configs.NEWNS}, }, ), } validator := validate.New() err := validator.Validate(config) if err != nil { t.Errorf("Expected error to not occur: %+v", err) } } func TestValidateSecurityWithROPaths(t *testing.T) { config := &configs.Config{ Rootfs: "/var", ReadonlyPaths: []string{"/proc/sys"}, Namespaces: configs.Namespaces( []configs.Namespace{ {Type: configs.NEWNS}, }, ), } validator := validate.New() err := validator.Validate(config) if err != nil { t.Errorf("Expected error to not occur: %+v", err) } } func TestValidateSecurityWithoutNEWNS(t *testing.T) { config := &configs.Config{ Rootfs: "/var", MaskPaths: []string{"/proc/kcore"}, ReadonlyPaths: []string{"/proc/sys"}, } validator := validate.New() err := validator.Validate(config) if err == nil { t.Error("Expected error to occur but it was nil") } } func TestValidateUsernamespace(t *testing.T) { if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { t.Skip("userns is unsupported") } config := &configs.Config{ Rootfs: "/var", Namespaces: configs.Namespaces( []configs.Namespace{ {Type: configs.NEWUSER}, }, ), } validator := validate.New() err := validator.Validate(config) if err != nil { t.Errorf("expected error to not occur %+v", err) } } func TestValidateUsernamespaceWithoutUserNS(t *testing.T) { uidMap := configs.IDMap{ContainerID: 123} config := &configs.Config{ Rootfs: "/var", UidMappings: []configs.IDMap{uidMap}, } validator := validate.New() err := validator.Validate(config) if err == nil { t.Error("Expected error to occur but it was nil") } } func TestValidateSysctl(t *testing.T) { sysctl := map[string]string{ "fs.mqueue.ctl": "ctl", "net.ctl": "ctl", "kernel.ctl": "ctl", } for k, v := range sysctl { config := &configs.Config{ Rootfs: "/var", Sysctl: map[string]string{k: v}, } validator := validate.New() err := validator.Validate(config) if err == nil { t.Error("Expected error to occur but it was nil") } } } func TestValidateValidSysctl(t *testing.T) { sysctl := map[string]string{ "fs.mqueue.ctl": "ctl", "net.ctl": "ctl", "kernel.msgmax": "ctl", } for k, v := range sysctl { config := &configs.Config{ Rootfs: "/var", Sysctl: map[string]string{k: v}, Namespaces: []configs.Namespace{ { Type: configs.NEWNET, }, { Type: configs.NEWIPC, }, }, } validator := validate.New() err := validator.Validate(config) if err != nil { t.Errorf("Expected error to not occur with {%s=%s} but got: %q", k, v, err) } } } func TestValidateSysctlWithSameNs(t *testing.T) { config := &configs.Config{ Rootfs: "/var", Sysctl: map[string]string{"net.ctl": "ctl"}, Namespaces: configs.Namespaces( []configs.Namespace{ { Type: configs.NEWNET, Path: "/proc/self/ns/net", }, }, ), } validator := validate.New() err := validator.Validate(config) if err == nil { t.Error("Expected error to occur but it was nil") } } func TestValidateSysctlWithoutNETNamespace(t *testing.T) { config := &configs.Config{ Rootfs: "/var", Sysctl: map[string]string{"net.ctl": "ctl"}, Namespaces: []configs.Namespace{}, } validator := validate.New() err := validator.Validate(config) if err == nil { t.Error("Expected error to occur but it was nil") } } docker-runc-tags-docker-1.13.1/libcontainer/console.go000066400000000000000000000004171304443252500226600ustar00rootroot00000000000000package libcontainer import "io" // Console represents a pseudo TTY. type Console interface { io.ReadWriter io.Closer // Path returns the filesystem path to the slave side of the pty. Path() string // Fd returns the fd for the master of the pty. Fd() uintptr } docker-runc-tags-docker-1.13.1/libcontainer/console_freebsd.go000066400000000000000000000006031304443252500243470ustar00rootroot00000000000000// +build freebsd package libcontainer import ( "errors" ) // NewConsole returns an initialized console that can be used within a container by copying bytes // from the master side to the slave that is attached as the tty for the container's init process. func NewConsole(uid, gid int) (Console, error) { return nil, errors.New("libcontainer console is not supported on FreeBSD") } docker-runc-tags-docker-1.13.1/libcontainer/console_linux.go000066400000000000000000000110601304443252500240730ustar00rootroot00000000000000package libcontainer import ( "fmt" "os" "path/filepath" "syscall" "unsafe" "github.com/opencontainers/runc/libcontainer/label" ) // NewConsole returns an initialized console that can be used within a container by copying bytes // from the master side to the slave that is attached as the tty for the container's init process. func NewConsole(uid, gid int) (Console, error) { master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) if err != nil { return nil, err } if err := saneTerminal(master); err != nil { return nil, err } console, err := ptsname(master) if err != nil { return nil, err } if err := unlockpt(master); err != nil { return nil, err } if err := os.Chmod(console, 0600); err != nil { return nil, err } if err := os.Chown(console, uid, gid); err != nil { return nil, err } return &linuxConsole{ slavePath: console, master: master, }, nil } // newConsoleFromPath is an internal function returning an initialized console for use inside // a container's MNT namespace. func newConsoleFromPath(slavePath string) *linuxConsole { return &linuxConsole{ slavePath: slavePath, } } // linuxConsole is a linux pseudo TTY for use within a container. type linuxConsole struct { master *os.File slavePath string } func (c *linuxConsole) Fd() uintptr { return c.master.Fd() } func (c *linuxConsole) Path() string { return c.slavePath } func (c *linuxConsole) Read(b []byte) (int, error) { return c.master.Read(b) } func (c *linuxConsole) Write(b []byte) (int, error) { return c.master.Write(b) } func (c *linuxConsole) Close() error { if m := c.master; m != nil { return m.Close() } return nil } // mount initializes the console inside the rootfs mounting with the specified mount label // and applying the correct ownership of the console. func (c *linuxConsole) mount(rootfs, mountLabel string) error { oldMask := syscall.Umask(0000) defer syscall.Umask(oldMask) if err := label.SetFileLabel(c.slavePath, mountLabel); err != nil { return err } dest := filepath.Join(rootfs, "/dev/console") f, err := os.Create(dest) if err != nil && !os.IsExist(err) { return err } if f != nil { f.Close() } return syscall.Mount(c.slavePath, dest, "bind", syscall.MS_BIND, "") } // dupStdio opens the slavePath for the console and dups the fds to the current // processes stdio, fd 0,1,2. func (c *linuxConsole) dupStdio() error { slave, err := c.open(syscall.O_RDWR) if err != nil { return err } fd := int(slave.Fd()) for _, i := range []int{0, 1, 2} { if err := syscall.Dup3(fd, i, 0); err != nil { return err } } return nil } // open is a clone of os.OpenFile without the O_CLOEXEC used to open the pty slave. func (c *linuxConsole) open(flag int) (*os.File, error) { r, e := syscall.Open(c.slavePath, flag, 0) if e != nil { return nil, &os.PathError{ Op: "open", Path: c.slavePath, Err: e, } } return os.NewFile(uintptr(r), c.slavePath), nil } func ioctl(fd uintptr, flag, data uintptr) error { if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, flag, data); err != 0 { return err } return nil } // unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. // unlockpt should be called before opening the slave side of a pty. func unlockpt(f *os.File) error { var u int32 return ioctl(f.Fd(), syscall.TIOCSPTLCK, uintptr(unsafe.Pointer(&u))) } // ptsname retrieves the name of the first available pts for the given master. func ptsname(f *os.File) (string, error) { var n int32 if err := ioctl(f.Fd(), syscall.TIOCGPTN, uintptr(unsafe.Pointer(&n))); err != nil { return "", err } return fmt.Sprintf("/dev/pts/%d", n), nil } // saneTerminal sets the necessary tty_ioctl(4)s to ensure that a pty pair // created by us acts normally. In particular, a not-very-well-known default of // Linux unix98 ptys is that they have +onlcr by default. While this isn't a // problem for terminal emulators, because we relay data from the terminal we // also relay that funky line discipline. func saneTerminal(terminal *os.File) error { // Go doesn't have a wrapper for any of the termios ioctls. var termios syscall.Termios if err := ioctl(terminal.Fd(), syscall.TCGETS, uintptr(unsafe.Pointer(&termios))); err != nil { return fmt.Errorf("ioctl(tty, tcgets): %s", err.Error()) } // Set -onlcr so we don't have to deal with \r. termios.Oflag &^= syscall.ONLCR if err := ioctl(terminal.Fd(), syscall.TCSETS, uintptr(unsafe.Pointer(&termios))); err != nil { return fmt.Errorf("ioctl(tty, tcsets): %s", err.Error()) } return nil } docker-runc-tags-docker-1.13.1/libcontainer/console_solaris.go000066400000000000000000000005601304443252500244130ustar00rootroot00000000000000package libcontainer import ( "errors" ) // NewConsole returns an initialized console that can be used within a container by copying bytes // from the master side to the slave that is attached as the tty for the container's init process. func NewConsole(uid, gid int) (Console, error) { return nil, errors.New("libcontainer console is not supported on Solaris") } docker-runc-tags-docker-1.13.1/libcontainer/console_windows.go000066400000000000000000000011261304443252500244300ustar00rootroot00000000000000package libcontainer // NewConsole returns an initialized console that can be used within a container func NewConsole(uid, gid int) (Console, error) { return &windowsConsole{}, nil } // windowsConsole is a Windows pseudo TTY for use within a container. type windowsConsole struct { } func (c *windowsConsole) Fd() uintptr { return 0 } func (c *windowsConsole) Path() string { return "" } func (c *windowsConsole) Read(b []byte) (int, error) { return 0, nil } func (c *windowsConsole) Write(b []byte) (int, error) { return 0, nil } func (c *windowsConsole) Close() error { return nil } docker-runc-tags-docker-1.13.1/libcontainer/container.go000066400000000000000000000114341304443252500232010ustar00rootroot00000000000000// Package libcontainer provides a native Go implementation for creating containers // with namespaces, cgroups, capabilities, and filesystem access controls. // It allows you to manage the lifecycle of the container performing additional operations // after the container is created. package libcontainer import ( "os" "time" "github.com/opencontainers/runc/libcontainer/configs" ) // Status is the status of a container. type Status int const ( // Created is the status that denotes the container exists but has not been run yet. Created Status = iota // Running is the status that denotes the container exists and is running. Running // Pausing is the status that denotes the container exists, it is in the process of being paused. Pausing // Paused is the status that denotes the container exists, but all its processes are paused. Paused // Stopped is the status that denotes the container does not have a created or running process. Stopped ) func (s Status) String() string { switch s { case Created: return "created" case Running: return "running" case Pausing: return "pausing" case Paused: return "paused" case Stopped: return "stopped" default: return "unknown" } } // BaseState represents the platform agnostic pieces relating to a // running container's state type BaseState struct { // ID is the container ID. ID string `json:"id"` // InitProcessPid is the init process id in the parent namespace. InitProcessPid int `json:"init_process_pid"` // InitProcessStartTime is the init process start time in clock cycles since boot time. InitProcessStartTime string `json:"init_process_start"` // Created is the unix timestamp for the creation time of the container in UTC Created time.Time `json:"created"` // Config is the container's configuration. Config configs.Config `json:"config"` } // BaseContainer is a libcontainer container object. // // Each container is thread-safe within the same process. Since a container can // be destroyed by a separate process, any function may return that the container // was not found. BaseContainer includes methods that are platform agnostic. type BaseContainer interface { // Returns the ID of the container ID() string // Returns the current status of the container. // // errors: // ContainerNotExists - Container no longer exists, // Systemerror - System error. Status() (Status, error) // State returns the current container's state information. // // errors: // SystemError - System error. State() (*State, error) // Returns the current config of the container. Config() configs.Config // Returns the PIDs inside this container. The PIDs are in the namespace of the calling process. // // errors: // ContainerNotExists - Container no longer exists, // Systemerror - System error. // // Some of the returned PIDs may no longer refer to processes in the Container, unless // the Container state is PAUSED in which case every PID in the slice is valid. Processes() ([]int, error) // Returns statistics for the container. // // errors: // ContainerNotExists - Container no longer exists, // Systemerror - System error. Stats() (*Stats, error) // Set resources of container as configured // // We can use this to change resources when containers are running. // // errors: // SystemError - System error. Set(config configs.Config) error // Start a process inside the container. Returns error if process fails to // start. You can track process lifecycle with passed Process structure. // // errors: // ContainerNotExists - Container no longer exists, // ConfigInvalid - config is invalid, // ContainerPaused - Container is paused, // SystemError - System error. Start(process *Process) (err error) // Run immediately starts the process inside the container. Returns error if process // fails to start. It does not block waiting for the exec fifo after start returns but // opens the fifo after start returns. // // errors: // ContainerNotExists - Container no longer exists, // ConfigInvalid - config is invalid, // ContainerPaused - Container is paused, // SystemError - System error. Run(process *Process) (err error) // Destroys the container after killing all running processes. // // Any event registrations are removed before the container is destroyed. // No error is returned if the container is already destroyed. // // errors: // SystemError - System error. Destroy() error // Signal sends the provided signal code to the container's initial process. // // If all is specified the signal is sent to all processes in the container // including the initial process. // // errors: // SystemError - System error. Signal(s os.Signal, all bool) error // Exec signals the container to exec the users process at the end of the init. // // errors: // SystemError - System error. Exec() error } docker-runc-tags-docker-1.13.1/libcontainer/container_linux.go000066400000000000000000001103431304443252500244170ustar00rootroot00000000000000// +build linux package libcontainer import ( "bytes" "encoding/json" "fmt" "io" "io/ioutil" "os" "os/exec" "path/filepath" "reflect" "strings" "sync" "syscall" "time" "github.com/Sirupsen/logrus" "github.com/golang/protobuf/proto" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/criurpc" "github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/runc/libcontainer/utils" "github.com/syndtr/gocapability/capability" "github.com/vishvananda/netlink/nl" ) const stdioFdCount = 3 type linuxContainer struct { id string root string config *configs.Config cgroupManager cgroups.Manager initArgs []string initProcess parentProcess initProcessStartTime string criuPath string m sync.Mutex criuVersion int state containerState created time.Time } // State represents a running container's state type State struct { BaseState // Platform specific fields below here // Path to all the cgroups setup for a container. Key is cgroup subsystem name // with the value as the path. CgroupPaths map[string]string `json:"cgroup_paths"` // NamespacePaths are filepaths to the container's namespaces. Key is the namespace type // with the value as the path. NamespacePaths map[configs.NamespaceType]string `json:"namespace_paths"` // Container's standard descriptors (std{in,out,err}), needed for checkpoint and restore ExternalDescriptors []string `json:"external_descriptors,omitempty"` } // Container is a libcontainer container object. // // Each container is thread-safe within the same process. Since a container can // be destroyed by a separate process, any function may return that the container // was not found. type Container interface { BaseContainer // Methods below here are platform specific // Checkpoint checkpoints the running container's state to disk using the criu(8) utility. // // errors: // Systemerror - System error. Checkpoint(criuOpts *CriuOpts) error // Restore restores the checkpointed container to a running state using the criu(8) utility. // // errors: // Systemerror - System error. Restore(process *Process, criuOpts *CriuOpts) error // If the Container state is RUNNING or CREATED, sets the Container state to PAUSING and pauses // the execution of any user processes. Asynchronously, when the container finished being paused the // state is changed to PAUSED. // If the Container state is PAUSED, do nothing. // // errors: // ContainerNotExists - Container no longer exists, // ContainerNotRunning - Container not running or created, // Systemerror - System error. Pause() error // If the Container state is PAUSED, resumes the execution of any user processes in the // Container before setting the Container state to RUNNING. // If the Container state is RUNNING, do nothing. // // errors: // ContainerNotExists - Container no longer exists, // ContainerNotPaused - Container is not paused, // Systemerror - System error. Resume() error // NotifyOOM returns a read-only channel signaling when the container receives an OOM notification. // // errors: // Systemerror - System error. NotifyOOM() (<-chan struct{}, error) // NotifyMemoryPressure returns a read-only channel signaling when the container reaches a given pressure level // // errors: // Systemerror - System error. NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) } // ID returns the container's unique ID func (c *linuxContainer) ID() string { return c.id } // Config returns the container's configuration func (c *linuxContainer) Config() configs.Config { return *c.config } func (c *linuxContainer) Status() (Status, error) { c.m.Lock() defer c.m.Unlock() return c.currentStatus() } func (c *linuxContainer) State() (*State, error) { c.m.Lock() defer c.m.Unlock() return c.currentState() } func (c *linuxContainer) Processes() ([]int, error) { pids, err := c.cgroupManager.GetAllPids() if err != nil { return nil, newSystemErrorWithCause(err, "getting all container pids from cgroups") } return pids, nil } func (c *linuxContainer) Stats() (*Stats, error) { var ( err error stats = &Stats{} ) if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil { return stats, newSystemErrorWithCause(err, "getting container stats from cgroups") } for _, iface := range c.config.Networks { switch iface.Type { case "veth": istats, err := getNetworkInterfaceStats(iface.HostInterfaceName) if err != nil { return stats, newSystemErrorWithCausef(err, "getting network stats for interface %q", iface.HostInterfaceName) } stats.Interfaces = append(stats.Interfaces, istats) } } return stats, nil } func (c *linuxContainer) Set(config configs.Config) error { c.m.Lock() defer c.m.Unlock() status, err := c.currentStatus() if err != nil { return err } if status == Stopped { return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning) } c.config = &config return c.cgroupManager.Set(c.config) } func (c *linuxContainer) Start(process *Process) error { c.m.Lock() defer c.m.Unlock() status, err := c.currentStatus() if err != nil { return err } return c.start(process, status == Stopped) } func (c *linuxContainer) Run(process *Process) error { c.m.Lock() defer c.m.Unlock() status, err := c.currentStatus() if err != nil { return err } if err := c.start(process, status == Stopped); err != nil { return err } if status == Stopped { return c.exec() } return nil } func (c *linuxContainer) Exec() error { c.m.Lock() defer c.m.Unlock() return c.exec() } func (c *linuxContainer) exec() error { path := filepath.Join(c.root, execFifoFilename) f, err := os.OpenFile(path, os.O_RDONLY, 0) if err != nil { return newSystemErrorWithCause(err, "open exec fifo for reading") } defer f.Close() data, err := ioutil.ReadAll(f) if err != nil { return err } if len(data) > 0 { os.Remove(path) return nil } return fmt.Errorf("cannot start an already running container") } func (c *linuxContainer) start(process *Process, isInit bool) error { parent, err := c.newParentProcess(process, isInit) if err != nil { return newSystemErrorWithCause(err, "creating new parent process") } if err := parent.start(); err != nil { // terminate the process to ensure that it properly is reaped. if err := parent.terminate(); err != nil { logrus.Warn(err) } return newSystemErrorWithCause(err, "starting container process") } // generate a timestamp indicating when the container was started c.created = time.Now().UTC() c.state = &runningState{ c: c, } if isInit { c.state = &createdState{ c: c, } state, err := c.updateState(parent) if err != nil { return err } c.initProcessStartTime = state.InitProcessStartTime if c.config.Hooks != nil { s := configs.HookState{ Version: c.config.Version, ID: c.id, Pid: parent.pid(), Root: c.config.Rootfs, BundlePath: utils.SearchLabels(c.config.Labels, "bundle"), } for i, hook := range c.config.Hooks.Poststart { if err := hook.Run(s); err != nil { if err := parent.terminate(); err != nil { logrus.Warn(err) } return newSystemErrorWithCausef(err, "running poststart hook %d", i) } } } } return nil } func (c *linuxContainer) Signal(s os.Signal, all bool) error { if all { return signalAllProcesses(c.cgroupManager, s) } if err := c.initProcess.signal(s); err != nil { return newSystemErrorWithCause(err, "signaling init process") } return nil } func (c *linuxContainer) newParentProcess(p *Process, doInit bool) (parentProcess, error) { parentPipe, childPipe, err := newPipe() if err != nil { return nil, newSystemErrorWithCause(err, "creating new init pipe") } rootDir, err := os.Open(c.root) if err != nil { return nil, err } cmd, err := c.commandTemplate(p, childPipe, rootDir) if err != nil { return nil, newSystemErrorWithCause(err, "creating new command template") } if !doInit { return c.newSetnsProcess(p, cmd, parentPipe, childPipe, rootDir) } return c.newInitProcess(p, cmd, parentPipe, childPipe, rootDir) } func (c *linuxContainer) commandTemplate(p *Process, childPipe, rootDir *os.File) (*exec.Cmd, error) { cmd := exec.Command(c.initArgs[0], c.initArgs[1:]...) cmd.Stdin = p.Stdin cmd.Stdout = p.Stdout cmd.Stderr = p.Stderr cmd.Dir = c.config.Rootfs if cmd.SysProcAttr == nil { cmd.SysProcAttr = &syscall.SysProcAttr{} } cmd.ExtraFiles = append(p.ExtraFiles, childPipe, rootDir) cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-2), fmt.Sprintf("_LIBCONTAINER_STATEDIR=%d", stdioFdCount+len(cmd.ExtraFiles)-1)) // NOTE: when running a container with no PID namespace and the parent process spawning the container is // PID1 the pdeathsig is being delivered to the container's init process by the kernel for some reason // even with the parent still running. if c.config.ParentDeathSignal > 0 { cmd.SysProcAttr.Pdeathsig = syscall.Signal(c.config.ParentDeathSignal) } return cmd, nil } func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe, rootDir *os.File) (*initProcess, error) { cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard)) nsMaps := make(map[configs.NamespaceType]string) for _, ns := range c.config.Namespaces { if ns.Path != "" { nsMaps[ns.Type] = ns.Path } } _, sharePidns := nsMaps[configs.NEWPID] data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps, "") if err != nil { return nil, err } return &initProcess{ cmd: cmd, childPipe: childPipe, parentPipe: parentPipe, manager: c.cgroupManager, config: c.newInitConfig(p), container: c, process: p, bootstrapData: data, sharePidns: sharePidns, rootDir: rootDir, }, nil } func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe, rootDir *os.File) (*setnsProcess, error) { cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns)) state, err := c.currentState() if err != nil { return nil, newSystemErrorWithCause(err, "getting container's current state") } // for setns process, we dont have to set cloneflags as the process namespaces // will only be set via setns syscall data, err := c.bootstrapData(0, state.NamespacePaths, p.consolePath) if err != nil { return nil, err } // TODO: set on container for process management return &setnsProcess{ cmd: cmd, cgroupPaths: c.cgroupManager.GetPaths(), childPipe: childPipe, parentPipe: parentPipe, config: c.newInitConfig(p), process: p, bootstrapData: data, rootDir: rootDir, }, nil } func (c *linuxContainer) newInitConfig(process *Process) *initConfig { cfg := &initConfig{ Config: c.config, Args: process.Args, Env: process.Env, User: process.User, AdditionalGroups: process.AdditionalGroups, Cwd: process.Cwd, Console: process.consolePath, Capabilities: process.Capabilities, PassedFilesCount: len(process.ExtraFiles), ContainerId: c.ID(), NoNewPrivileges: c.config.NoNewPrivileges, AppArmorProfile: c.config.AppArmorProfile, ProcessLabel: c.config.ProcessLabel, Rlimits: c.config.Rlimits, ExecFifoPath: filepath.Join(c.root, execFifoFilename), } if process.NoNewPrivileges != nil { cfg.NoNewPrivileges = *process.NoNewPrivileges } if process.AppArmorProfile != "" { cfg.AppArmorProfile = process.AppArmorProfile } if process.Label != "" { cfg.ProcessLabel = process.Label } if len(process.Rlimits) > 0 { cfg.Rlimits = process.Rlimits } return cfg } func newPipe() (parent *os.File, child *os.File, err error) { fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0) if err != nil { return nil, nil, err } return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil } func (c *linuxContainer) Destroy() error { c.m.Lock() defer c.m.Unlock() return c.state.destroy() } func (c *linuxContainer) Pause() error { c.m.Lock() defer c.m.Unlock() status, err := c.currentStatus() if err != nil { return err } switch status { case Running, Created: if err := c.cgroupManager.Freeze(configs.Frozen); err != nil { return err } return c.state.transition(&pausedState{ c: c, }) } return newGenericError(fmt.Errorf("container not running or created: %s", status), ContainerNotRunning) } func (c *linuxContainer) Resume() error { c.m.Lock() defer c.m.Unlock() status, err := c.currentStatus() if err != nil { return err } if status != Paused { return newGenericError(fmt.Errorf("container not paused"), ContainerNotPaused) } if err := c.cgroupManager.Freeze(configs.Thawed); err != nil { return err } return c.state.transition(&runningState{ c: c, }) } func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) { return notifyOnOOM(c.cgroupManager.GetPaths()) } func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) { return notifyMemoryPressure(c.cgroupManager.GetPaths(), level) } // checkCriuVersion checks Criu version greater than or equal to minVersion func (c *linuxContainer) checkCriuVersion(minVersion string) error { var x, y, z, versionReq int _, err := fmt.Sscanf(minVersion, "%d.%d.%d\n", &x, &y, &z) // 1.5.2 if err != nil { _, err = fmt.Sscanf(minVersion, "Version: %d.%d\n", &x, &y) // 1.6 } versionReq = x*10000 + y*100 + z out, err := exec.Command(c.criuPath, "-V").Output() if err != nil { return fmt.Errorf("Unable to execute CRIU command: %s", c.criuPath) } x = 0 y = 0 z = 0 if ep := strings.Index(string(out), "-"); ep >= 0 { // criu Git version format var version string if sp := strings.Index(string(out), "GitID"); sp > 0 { version = string(out)[sp:ep] } else { return fmt.Errorf("Unable to parse the CRIU version: %s", c.criuPath) } n, err := fmt.Sscanf(string(version), "GitID: v%d.%d.%d", &x, &y, &z) // 1.5.2 if err != nil { n, err = fmt.Sscanf(string(version), "GitID: v%d.%d", &x, &y) // 1.6 y++ } else { z++ } if n < 2 || err != nil { return fmt.Errorf("Unable to parse the CRIU version: %s %d %s", version, n, err) } } else { // criu release version format n, err := fmt.Sscanf(string(out), "Version: %d.%d.%d\n", &x, &y, &z) // 1.5.2 if err != nil { n, err = fmt.Sscanf(string(out), "Version: %d.%d\n", &x, &y) // 1.6 } if n < 2 || err != nil { return fmt.Errorf("Unable to parse the CRIU version: %s %d %s", out, n, err) } } c.criuVersion = x*10000 + y*100 + z if c.criuVersion < versionReq { return fmt.Errorf("CRIU version must be %s or higher", minVersion) } return nil } const descriptorsFilename = "descriptors.json" func (c *linuxContainer) addCriuDumpMount(req *criurpc.CriuReq, m *configs.Mount) { mountDest := m.Destination if strings.HasPrefix(mountDest, c.config.Rootfs) { mountDest = mountDest[len(c.config.Rootfs):] } extMnt := &criurpc.ExtMountMap{ Key: proto.String(mountDest), Val: proto.String(mountDest), } req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt) } func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error { c.m.Lock() defer c.m.Unlock() if err := c.checkCriuVersion("1.5.2"); err != nil { return err } if criuOpts.ImagesDirectory == "" { return fmt.Errorf("invalid directory to save checkpoint") } // Since a container can be C/R'ed multiple times, // the checkpoint directory may already exist. if err := os.Mkdir(criuOpts.ImagesDirectory, 0755); err != nil && !os.IsExist(err) { return err } if criuOpts.WorkDirectory == "" { criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work") } if err := os.Mkdir(criuOpts.WorkDirectory, 0755); err != nil && !os.IsExist(err) { return err } workDir, err := os.Open(criuOpts.WorkDirectory) if err != nil { return err } defer workDir.Close() imageDir, err := os.Open(criuOpts.ImagesDirectory) if err != nil { return err } defer imageDir.Close() rpcOpts := criurpc.CriuOpts{ ImagesDirFd: proto.Int32(int32(imageDir.Fd())), WorkDirFd: proto.Int32(int32(workDir.Fd())), LogLevel: proto.Int32(4), LogFile: proto.String("dump.log"), Root: proto.String(c.config.Rootfs), ManageCgroups: proto.Bool(true), NotifyScripts: proto.Bool(true), Pid: proto.Int32(int32(c.initProcess.pid())), ShellJob: proto.Bool(criuOpts.ShellJob), LeaveRunning: proto.Bool(criuOpts.LeaveRunning), TcpEstablished: proto.Bool(criuOpts.TcpEstablished), ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections), FileLocks: proto.Bool(criuOpts.FileLocks), EmptyNs: proto.Uint32(criuOpts.EmptyNs), } // append optional criu opts, e.g., page-server and port if criuOpts.PageServer.Address != "" && criuOpts.PageServer.Port != 0 { rpcOpts.Ps = &criurpc.CriuPageServerInfo{ Address: proto.String(criuOpts.PageServer.Address), Port: proto.Int32(criuOpts.PageServer.Port), } } // append optional manage cgroups mode if criuOpts.ManageCgroupsMode != 0 { if err := c.checkCriuVersion("1.7"); err != nil { return err } mode := criurpc.CriuCgMode(criuOpts.ManageCgroupsMode) rpcOpts.ManageCgroupsMode = &mode } t := criurpc.CriuReqType_DUMP req := &criurpc.CriuReq{ Type: &t, Opts: &rpcOpts, } for _, m := range c.config.Mounts { switch m.Device { case "bind": c.addCriuDumpMount(req, m) break case "cgroup": binds, err := getCgroupMounts(m) if err != nil { return err } for _, b := range binds { c.addCriuDumpMount(req, b) } break } } // Write the FD info to a file in the image directory fdsJSON, err := json.Marshal(c.initProcess.externalDescriptors()) if err != nil { return err } err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0655) if err != nil { return err } err = c.criuSwrk(nil, req, criuOpts, false) if err != nil { return err } return nil } func (c *linuxContainer) addCriuRestoreMount(req *criurpc.CriuReq, m *configs.Mount) { mountDest := m.Destination if strings.HasPrefix(mountDest, c.config.Rootfs) { mountDest = mountDest[len(c.config.Rootfs):] } extMnt := &criurpc.ExtMountMap{ Key: proto.String(mountDest), Val: proto.String(m.Source), } req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt) } func (c *linuxContainer) restoreNetwork(req *criurpc.CriuReq, criuOpts *CriuOpts) { for _, iface := range c.config.Networks { switch iface.Type { case "veth": veth := new(criurpc.CriuVethPair) veth.IfOut = proto.String(iface.HostInterfaceName) veth.IfIn = proto.String(iface.Name) req.Opts.Veths = append(req.Opts.Veths, veth) break case "loopback": break } } for _, i := range criuOpts.VethPairs { veth := new(criurpc.CriuVethPair) veth.IfOut = proto.String(i.HostInterfaceName) veth.IfIn = proto.String(i.ContainerInterfaceName) req.Opts.Veths = append(req.Opts.Veths, veth) } } func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error { c.m.Lock() defer c.m.Unlock() if err := c.checkCriuVersion("1.5.2"); err != nil { return err } if criuOpts.WorkDirectory == "" { criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work") } // Since a container can be C/R'ed multiple times, // the work directory may already exist. if err := os.Mkdir(criuOpts.WorkDirectory, 0655); err != nil && !os.IsExist(err) { return err } workDir, err := os.Open(criuOpts.WorkDirectory) if err != nil { return err } defer workDir.Close() if criuOpts.ImagesDirectory == "" { return fmt.Errorf("invalid directory to restore checkpoint") } imageDir, err := os.Open(criuOpts.ImagesDirectory) if err != nil { return err } defer imageDir.Close() // CRIU has a few requirements for a root directory: // * it must be a mount point // * its parent must not be overmounted // c.config.Rootfs is bind-mounted to a temporary directory // to satisfy these requirements. root := filepath.Join(c.root, "criu-root") if err := os.Mkdir(root, 0755); err != nil { return err } defer os.Remove(root) root, err = filepath.EvalSymlinks(root) if err != nil { return err } err = syscall.Mount(c.config.Rootfs, root, "", syscall.MS_BIND|syscall.MS_REC, "") if err != nil { return err } defer syscall.Unmount(root, syscall.MNT_DETACH) t := criurpc.CriuReqType_RESTORE req := &criurpc.CriuReq{ Type: &t, Opts: &criurpc.CriuOpts{ ImagesDirFd: proto.Int32(int32(imageDir.Fd())), WorkDirFd: proto.Int32(int32(workDir.Fd())), EvasiveDevices: proto.Bool(true), LogLevel: proto.Int32(4), LogFile: proto.String("restore.log"), RstSibling: proto.Bool(true), Root: proto.String(root), ManageCgroups: proto.Bool(true), NotifyScripts: proto.Bool(true), ShellJob: proto.Bool(criuOpts.ShellJob), ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections), TcpEstablished: proto.Bool(criuOpts.TcpEstablished), FileLocks: proto.Bool(criuOpts.FileLocks), EmptyNs: proto.Uint32(criuOpts.EmptyNs), }, } for _, m := range c.config.Mounts { switch m.Device { case "bind": c.addCriuRestoreMount(req, m) break case "cgroup": binds, err := getCgroupMounts(m) if err != nil { return err } for _, b := range binds { c.addCriuRestoreMount(req, b) } break } } if criuOpts.EmptyNs&syscall.CLONE_NEWNET == 0 { c.restoreNetwork(req, criuOpts) } // append optional manage cgroups mode if criuOpts.ManageCgroupsMode != 0 { if err := c.checkCriuVersion("1.7"); err != nil { return err } mode := criurpc.CriuCgMode(criuOpts.ManageCgroupsMode) req.Opts.ManageCgroupsMode = &mode } var ( fds []string fdJSON []byte ) if fdJSON, err = ioutil.ReadFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename)); err != nil { return err } if err := json.Unmarshal(fdJSON, &fds); err != nil { return err } for i := range fds { if s := fds[i]; strings.Contains(s, "pipe:") { inheritFd := new(criurpc.InheritFd) inheritFd.Key = proto.String(s) inheritFd.Fd = proto.Int32(int32(i)) req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd) } } return c.criuSwrk(process, req, criuOpts, true) } func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error { if err := c.cgroupManager.Apply(pid); err != nil { return err } path := fmt.Sprintf("/proc/%d/cgroup", pid) cgroupsPaths, err := cgroups.ParseCgroupFile(path) if err != nil { return err } for c, p := range cgroupsPaths { cgroupRoot := &criurpc.CgroupRoot{ Ctrl: proto.String(c), Path: proto.String(p), } req.Opts.CgRoot = append(req.Opts.CgRoot, cgroupRoot) } return nil } func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool) error { fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0) if err != nil { return err } logPath := filepath.Join(opts.WorkDirectory, req.GetOpts().GetLogFile()) criuClient := os.NewFile(uintptr(fds[0]), "criu-transport-client") criuServer := os.NewFile(uintptr(fds[1]), "criu-transport-server") defer criuClient.Close() defer criuServer.Close() args := []string{"swrk", "3"} logrus.Debugf("Using CRIU %d at: %s", c.criuVersion, c.criuPath) logrus.Debugf("Using CRIU with following args: %s", args) cmd := exec.Command(c.criuPath, args...) if process != nil { cmd.Stdin = process.Stdin cmd.Stdout = process.Stdout cmd.Stderr = process.Stderr } cmd.ExtraFiles = append(cmd.ExtraFiles, criuServer) if err := cmd.Start(); err != nil { return err } criuServer.Close() defer func() { criuClient.Close() _, err := cmd.Process.Wait() if err != nil { return } }() if applyCgroups { err := c.criuApplyCgroups(cmd.Process.Pid, req) if err != nil { return err } } var extFds []string if process != nil { extFds, err = getPipeFds(cmd.Process.Pid) if err != nil { return err } } logrus.Debugf("Using CRIU in %s mode", req.GetType().String()) val := reflect.ValueOf(req.GetOpts()) v := reflect.Indirect(val) for i := 0; i < v.NumField(); i++ { st := v.Type() name := st.Field(i).Name if strings.HasPrefix(name, "XXX_") { continue } value := val.MethodByName("Get" + name).Call([]reflect.Value{}) logrus.Debugf("CRIU option %s with value %v", name, value[0]) } data, err := proto.Marshal(req) if err != nil { return err } _, err = criuClient.Write(data) if err != nil { return err } buf := make([]byte, 10*4096) for true { n, err := criuClient.Read(buf) if err != nil { return err } if n == 0 { return fmt.Errorf("unexpected EOF") } if n == len(buf) { return fmt.Errorf("buffer is too small") } resp := new(criurpc.CriuResp) err = proto.Unmarshal(buf[:n], resp) if err != nil { return err } if !resp.GetSuccess() { typeString := req.GetType().String() return fmt.Errorf("criu failed: type %s errno %d\nlog file: %s", typeString, resp.GetCrErrno(), logPath) } t := resp.GetType() switch { case t == criurpc.CriuReqType_NOTIFY: if err := c.criuNotifications(resp, process, opts, extFds); err != nil { return err } t = criurpc.CriuReqType_NOTIFY req = &criurpc.CriuReq{ Type: &t, NotifySuccess: proto.Bool(true), } data, err = proto.Marshal(req) if err != nil { return err } _, err = criuClient.Write(data) if err != nil { return err } continue case t == criurpc.CriuReqType_RESTORE: case t == criurpc.CriuReqType_DUMP: break default: return fmt.Errorf("unable to parse the response %s", resp.String()) } break } // cmd.Wait() waits cmd.goroutines which are used for proxying file descriptors. // Here we want to wait only the CRIU process. st, err := cmd.Process.Wait() if err != nil { return err } if !st.Success() { return fmt.Errorf("criu failed: %s\nlog file: %s", st.String(), logPath) } return nil } // block any external network activity func lockNetwork(config *configs.Config) error { for _, config := range config.Networks { strategy, err := getStrategy(config.Type) if err != nil { return err } if err := strategy.detach(config); err != nil { return err } } return nil } func unlockNetwork(config *configs.Config) error { for _, config := range config.Networks { strategy, err := getStrategy(config.Type) if err != nil { return err } if err = strategy.attach(config); err != nil { return err } } return nil } func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, opts *CriuOpts, fds []string) error { notify := resp.GetNotify() if notify == nil { return fmt.Errorf("invalid response: %s", resp.String()) } switch { case notify.GetScript() == "post-dump": f, err := os.Create(filepath.Join(c.root, "checkpoint")) if err != nil { return err } f.Close() case notify.GetScript() == "network-unlock": if err := unlockNetwork(c.config); err != nil { return err } case notify.GetScript() == "network-lock": if err := lockNetwork(c.config); err != nil { return err } case notify.GetScript() == "setup-namespaces": if c.config.Hooks != nil { s := configs.HookState{ Version: c.config.Version, ID: c.id, Pid: int(notify.GetPid()), Root: c.config.Rootfs, } for i, hook := range c.config.Hooks.Prestart { if err := hook.Run(s); err != nil { return newSystemErrorWithCausef(err, "running prestart hook %d", i) } } } case notify.GetScript() == "post-restore": pid := notify.GetPid() r, err := newRestoredProcess(int(pid), fds) if err != nil { return err } process.ops = r if err := c.state.transition(&restoredState{ imageDir: opts.ImagesDirectory, c: c, }); err != nil { return err } // create a timestamp indicating when the restored checkpoint was started c.created = time.Now().UTC() if _, err := c.updateState(r); err != nil { return err } if err := os.Remove(filepath.Join(c.root, "checkpoint")); err != nil { if !os.IsNotExist(err) { logrus.Error(err) } } } return nil } func (c *linuxContainer) updateState(process parentProcess) (*State, error) { c.initProcess = process state, err := c.currentState() if err != nil { return nil, err } err = c.saveState(state) if err != nil { return nil, err } return state, nil } func (c *linuxContainer) saveState(s *State) error { f, err := os.Create(filepath.Join(c.root, stateFilename)) if err != nil { return err } defer f.Close() return utils.WriteJSON(f, s) } func (c *linuxContainer) deleteState() error { return os.Remove(filepath.Join(c.root, stateFilename)) } func (c *linuxContainer) currentStatus() (Status, error) { if err := c.refreshState(); err != nil { return -1, err } return c.state.status(), nil } // refreshState needs to be called to verify that the current state on the // container is what is true. Because consumers of libcontainer can use it // out of process we need to verify the container's status based on runtime // information and not rely on our in process info. func (c *linuxContainer) refreshState() error { paused, err := c.isPaused() if err != nil { return err } if paused { return c.state.transition(&pausedState{c: c}) } t, err := c.runType() if err != nil { return err } switch t { case Created: return c.state.transition(&createdState{c: c}) case Running: return c.state.transition(&runningState{c: c}) } return c.state.transition(&stoppedState{c: c}) } // doesInitProcessExist checks if the init process is still the same process // as the initial one, it could happen that the original process has exited // and a new process has been created with the same pid, in this case, the // container would already be stopped. func (c *linuxContainer) doesInitProcessExist(initPid int) (bool, error) { startTime, err := system.GetProcessStartTime(initPid) if err != nil { return false, newSystemErrorWithCausef(err, "getting init process %d start time", initPid) } if c.initProcessStartTime != startTime { return false, nil } return true, nil } func (c *linuxContainer) runType() (Status, error) { if c.initProcess == nil { return Stopped, nil } pid := c.initProcess.pid() // return Running if the init process is alive if err := syscall.Kill(pid, 0); err != nil { if err == syscall.ESRCH { // It means the process does not exist anymore, could happen when the // process exited just when we call the function, we should not return // error in this case. return Stopped, nil } return Stopped, newSystemErrorWithCausef(err, "sending signal 0 to pid %d", pid) } // check if the process is still the original init process. exist, err := c.doesInitProcessExist(pid) if !exist || err != nil { return Stopped, err } // check if the process that is running is the init process or the user's process. // this is the difference between the container Running and Created. environ, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/environ", pid)) if err != nil { return Stopped, newSystemErrorWithCausef(err, "reading /proc/%d/environ", pid) } check := []byte("_LIBCONTAINER") if bytes.Contains(environ, check) { return Created, nil } return Running, nil } func (c *linuxContainer) isPaused() (bool, error) { data, err := ioutil.ReadFile(filepath.Join(c.cgroupManager.GetPaths()["freezer"], "freezer.state")) if err != nil { // If freezer cgroup is not mounted, the container would just be not paused. if os.IsNotExist(err) { return false, nil } return false, newSystemErrorWithCause(err, "checking if container is paused") } return bytes.Equal(bytes.TrimSpace(data), []byte("FROZEN")), nil } func (c *linuxContainer) currentState() (*State, error) { var ( startTime string externalDescriptors []string pid = -1 ) if c.initProcess != nil { pid = c.initProcess.pid() startTime, _ = c.initProcess.startTime() externalDescriptors = c.initProcess.externalDescriptors() } state := &State{ BaseState: BaseState{ ID: c.ID(), Config: *c.config, InitProcessPid: pid, InitProcessStartTime: startTime, Created: c.created, }, CgroupPaths: c.cgroupManager.GetPaths(), NamespacePaths: make(map[configs.NamespaceType]string), ExternalDescriptors: externalDescriptors, } if pid > 0 { for _, ns := range c.config.Namespaces { state.NamespacePaths[ns.Type] = ns.GetPath(pid) } for _, nsType := range configs.NamespaceTypes() { if !configs.IsNamespaceSupported(nsType) { continue } if _, ok := state.NamespacePaths[nsType]; !ok { ns := configs.Namespace{Type: nsType} state.NamespacePaths[ns.Type] = ns.GetPath(pid) } } } return state, nil } // orderNamespacePaths sorts namespace paths into a list of paths that we // can setns in order. func (c *linuxContainer) orderNamespacePaths(namespaces map[configs.NamespaceType]string) ([]string, error) { paths := []string{} order := []configs.NamespaceType{ // The user namespace *must* be done first. configs.NEWUSER, configs.NEWIPC, configs.NEWUTS, configs.NEWNET, configs.NEWPID, configs.NEWNS, } // Remove namespaces that we don't need to join. var nsTypes []configs.NamespaceType for _, ns := range order { if c.config.Namespaces.Contains(ns) { nsTypes = append(nsTypes, ns) } } for _, nsType := range nsTypes { if p, ok := namespaces[nsType]; ok && p != "" { // check if the requested namespace is supported if !configs.IsNamespaceSupported(nsType) { return nil, newSystemError(fmt.Errorf("namespace %s is not supported", nsType)) } // only set to join this namespace if it exists if _, err := os.Lstat(p); err != nil { return nil, newSystemErrorWithCausef(err, "running lstat on namespace path %q", p) } // do not allow namespace path with comma as we use it to separate // the namespace paths if strings.ContainsRune(p, ',') { return nil, newSystemError(fmt.Errorf("invalid path %s", p)) } paths = append(paths, fmt.Sprintf("%s:%s", configs.NsName(nsType), p)) } } return paths, nil } func encodeIDMapping(idMap []configs.IDMap) ([]byte, error) { data := bytes.NewBuffer(nil) for _, im := range idMap { line := fmt.Sprintf("%d %d %d\n", im.ContainerID, im.HostID, im.Size) if _, err := data.WriteString(line); err != nil { return nil, err } } return data.Bytes(), nil } // bootstrapData encodes the necessary data in netlink binary format // as a io.Reader. // Consumer can write the data to a bootstrap program // such as one that uses nsenter package to bootstrap the container's // init process correctly, i.e. with correct namespaces, uid/gid // mapping etc. func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string, consolePath string) (io.Reader, error) { // create the netlink message r := nl.NewNetlinkRequest(int(InitMsg), 0) // write cloneFlags r.AddData(&Int32msg{ Type: CloneFlagsAttr, Value: uint32(cloneFlags), }) // write console path if consolePath != "" { r.AddData(&Bytemsg{ Type: ConsolePathAttr, Value: []byte(consolePath), }) } // write custom namespace paths if len(nsMaps) > 0 { nsPaths, err := c.orderNamespacePaths(nsMaps) if err != nil { return nil, err } r.AddData(&Bytemsg{ Type: NsPathsAttr, Value: []byte(strings.Join(nsPaths, ",")), }) } // write namespace paths only when we are not joining an existing user ns _, joinExistingUser := nsMaps[configs.NEWUSER] if !joinExistingUser { // write uid mappings if len(c.config.UidMappings) > 0 { b, err := encodeIDMapping(c.config.UidMappings) if err != nil { return nil, err } r.AddData(&Bytemsg{ Type: UidmapAttr, Value: b, }) } // write gid mappings if len(c.config.GidMappings) > 0 { b, err := encodeIDMapping(c.config.GidMappings) if err != nil { return nil, err } r.AddData(&Bytemsg{ Type: GidmapAttr, Value: b, }) // check if we have CAP_SETGID to setgroup properly pid, err := capability.NewPid(os.Getpid()) if err != nil { return nil, err } if !pid.Get(capability.EFFECTIVE, capability.CAP_SETGID) { r.AddData(&Boolmsg{ Type: SetgroupAttr, Value: true, }) } } } return bytes.NewReader(r.Serialize()), nil } docker-runc-tags-docker-1.13.1/libcontainer/container_linux_test.go000066400000000000000000000112521304443252500254550ustar00rootroot00000000000000// +build linux package libcontainer import ( "fmt" "os" "testing" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" ) type mockCgroupManager struct { pids []int allPids []int stats *cgroups.Stats paths map[string]string } func (m *mockCgroupManager) GetPids() ([]int, error) { return m.pids, nil } func (m *mockCgroupManager) GetAllPids() ([]int, error) { return m.allPids, nil } func (m *mockCgroupManager) GetStats() (*cgroups.Stats, error) { return m.stats, nil } func (m *mockCgroupManager) Apply(pid int) error { return nil } func (m *mockCgroupManager) Set(container *configs.Config) error { return nil } func (m *mockCgroupManager) Destroy() error { return nil } func (m *mockCgroupManager) GetPaths() map[string]string { return m.paths } func (m *mockCgroupManager) Freeze(state configs.FreezerState) error { return nil } type mockProcess struct { _pid int started string } func (m *mockProcess) terminate() error { return nil } func (m *mockProcess) pid() int { return m._pid } func (m *mockProcess) startTime() (string, error) { return m.started, nil } func (m *mockProcess) start() error { return nil } func (m *mockProcess) wait() (*os.ProcessState, error) { return nil, nil } func (m *mockProcess) signal(_ os.Signal) error { return nil } func (m *mockProcess) externalDescriptors() []string { return []string{} } func (m *mockProcess) setExternalDescriptors(newFds []string) { } func TestGetContainerPids(t *testing.T) { container := &linuxContainer{ id: "myid", config: &configs.Config{}, cgroupManager: &mockCgroupManager{allPids: []int{1, 2, 3}}, } pids, err := container.Processes() if err != nil { t.Fatal(err) } for i, expected := range []int{1, 2, 3} { if pids[i] != expected { t.Fatalf("expected pid %d but received %d", expected, pids[i]) } } } func TestGetContainerStats(t *testing.T) { container := &linuxContainer{ id: "myid", config: &configs.Config{}, cgroupManager: &mockCgroupManager{ pids: []int{1, 2, 3}, stats: &cgroups.Stats{ MemoryStats: cgroups.MemoryStats{ Usage: cgroups.MemoryData{ Usage: 1024, }, }, }, }, } stats, err := container.Stats() if err != nil { t.Fatal(err) } if stats.CgroupStats == nil { t.Fatal("cgroup stats are nil") } if stats.CgroupStats.MemoryStats.Usage.Usage != 1024 { t.Fatalf("expected memory usage 1024 but recevied %d", stats.CgroupStats.MemoryStats.Usage.Usage) } } func TestGetContainerState(t *testing.T) { var ( pid = os.Getpid() expectedMemoryPath = "/sys/fs/cgroup/memory/myid" expectedNetworkPath = "/networks/fd" ) container := &linuxContainer{ id: "myid", config: &configs.Config{ Namespaces: []configs.Namespace{ {Type: configs.NEWPID}, {Type: configs.NEWNS}, {Type: configs.NEWNET, Path: expectedNetworkPath}, {Type: configs.NEWUTS}, // emulate host for IPC //{Type: configs.NEWIPC}, }, }, initProcess: &mockProcess{ _pid: pid, started: "010", }, cgroupManager: &mockCgroupManager{ pids: []int{1, 2, 3}, stats: &cgroups.Stats{ MemoryStats: cgroups.MemoryStats{ Usage: cgroups.MemoryData{ Usage: 1024, }, }, }, paths: map[string]string{ "memory": expectedMemoryPath, }, }, } container.state = &createdState{c: container} state, err := container.State() if err != nil { t.Fatal(err) } if state.InitProcessPid != pid { t.Fatalf("expected pid %d but received %d", pid, state.InitProcessPid) } if state.InitProcessStartTime != "010" { t.Fatalf("expected process start time 010 but received %s", state.InitProcessStartTime) } paths := state.CgroupPaths if paths == nil { t.Fatal("cgroup paths should not be nil") } if memPath := paths["memory"]; memPath != expectedMemoryPath { t.Fatalf("expected memory path %q but received %q", expectedMemoryPath, memPath) } for _, ns := range container.config.Namespaces { path := state.NamespacePaths[ns.Type] if path == "" { t.Fatalf("expected non nil namespace path for %s", ns.Type) } if ns.Type == configs.NEWNET { if path != expectedNetworkPath { t.Fatalf("expected path %q but received %q", expectedNetworkPath, path) } } else { file := "" switch ns.Type { case configs.NEWNET: file = "net" case configs.NEWNS: file = "mnt" case configs.NEWPID: file = "pid" case configs.NEWIPC: file = "ipc" case configs.NEWUSER: file = "user" case configs.NEWUTS: file = "uts" } expected := fmt.Sprintf("/proc/%d/ns/%s", pid, file) if expected != path { t.Fatalf("expected path %q but received %q", expected, path) } } } } docker-runc-tags-docker-1.13.1/libcontainer/container_solaris.go000066400000000000000000000007071304443252500247360ustar00rootroot00000000000000package libcontainer // State represents a running container's state type State struct { BaseState // Platform specific fields below here } // A libcontainer container object. // // Each container is thread-safe within the same process. Since a container can // be destroyed by a separate process, any function may return that the container // was not found. type Container interface { BaseContainer // Methods below here are platform specific } docker-runc-tags-docker-1.13.1/libcontainer/container_windows.go000066400000000000000000000007071304443252500247540ustar00rootroot00000000000000package libcontainer // State represents a running container's state type State struct { BaseState // Platform specific fields below here } // A libcontainer container object. // // Each container is thread-safe within the same process. Since a container can // be destroyed by a separate process, any function may return that the container // was not found. type Container interface { BaseContainer // Methods below here are platform specific } docker-runc-tags-docker-1.13.1/libcontainer/criu_opts_unix.go000066400000000000000000000032221304443252500242650ustar00rootroot00000000000000// +build linux freebsd package libcontainer // cgroup restoring strategy provided by criu type cgMode uint32 const ( CRIU_CG_MODE_SOFT cgMode = 3 + iota // restore cgroup properties if only dir created by criu CRIU_CG_MODE_FULL // always restore all cgroups and their properties CRIU_CG_MODE_STRICT // restore all, requiring them to not present in the system CRIU_CG_MODE_DEFAULT // the same as CRIU_CG_MODE_SOFT ) type CriuPageServerInfo struct { Address string // IP address of CRIU page server Port int32 // port number of CRIU page server } type VethPairName struct { ContainerInterfaceName string HostInterfaceName string } type CriuOpts struct { ImagesDirectory string // directory for storing image files WorkDirectory string // directory to cd and write logs/pidfiles/stats to LeaveRunning bool // leave container in running state after checkpoint TcpEstablished bool // checkpoint/restore established TCP connections ExternalUnixConnections bool // allow external unix connections ShellJob bool // allow to dump and restore shell jobs FileLocks bool // handle file locks, for safety PageServer CriuPageServerInfo // allow to dump to criu page server VethPairs []VethPairName // pass the veth to criu when restore ManageCgroupsMode cgMode // dump or restore cgroup mode EmptyNs uint32 // don't c/r properties for namespace from this mask } docker-runc-tags-docker-1.13.1/libcontainer/criu_opts_windows.go000066400000000000000000000002431304443252500247740ustar00rootroot00000000000000package libcontainer // TODO Windows: This can ultimately be entirely factored out as criu is // a Unix concept not relevant on Windows. type CriuOpts struct { } docker-runc-tags-docker-1.13.1/libcontainer/criurpc/000077500000000000000000000000001304443252500223345ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/libcontainer/criurpc/Makefile000066400000000000000000000000641304443252500237740ustar00rootroot00000000000000gen: criurpc.proto protoc --go_out=. criurpc.proto docker-runc-tags-docker-1.13.1/libcontainer/criurpc/criurpc.pb.go000066400000000000000000000522651304443252500247440ustar00rootroot00000000000000// Code generated by protoc-gen-go. // source: criurpc.proto // DO NOT EDIT! /* Package criurpc is a generated protocol buffer package. It is generated from these files: criurpc.proto It has these top-level messages: CriuPageServerInfo CriuVethPair ExtMountMap InheritFd CgroupRoot UnixSk CriuOpts CriuDumpResp CriuRestoreResp CriuNotify CriuFeatures CriuReq CriuResp */ package criurpc import proto "github.com/golang/protobuf/proto" import math "math" // Reference imports to suppress errors if they are not otherwise used. var _ = proto.Marshal var _ = math.Inf type CriuCgMode int32 const ( CriuCgMode_IGNORE CriuCgMode = 0 CriuCgMode_NONE CriuCgMode = 1 CriuCgMode_PROPS CriuCgMode = 2 CriuCgMode_SOFT CriuCgMode = 3 CriuCgMode_FULL CriuCgMode = 4 CriuCgMode_STRICT CriuCgMode = 5 CriuCgMode_DEFAULT CriuCgMode = 6 ) var CriuCgMode_name = map[int32]string{ 0: "IGNORE", 1: "NONE", 2: "PROPS", 3: "SOFT", 4: "FULL", 5: "STRICT", 6: "DEFAULT", } var CriuCgMode_value = map[string]int32{ "IGNORE": 0, "NONE": 1, "PROPS": 2, "SOFT": 3, "FULL": 4, "STRICT": 5, "DEFAULT": 6, } func (x CriuCgMode) Enum() *CriuCgMode { p := new(CriuCgMode) *p = x return p } func (x CriuCgMode) String() string { return proto.EnumName(CriuCgMode_name, int32(x)) } func (x *CriuCgMode) UnmarshalJSON(data []byte) error { value, err := proto.UnmarshalJSONEnum(CriuCgMode_value, data, "CriuCgMode") if err != nil { return err } *x = CriuCgMode(value) return nil } type CriuReqType int32 const ( CriuReqType_EMPTY CriuReqType = 0 CriuReqType_DUMP CriuReqType = 1 CriuReqType_RESTORE CriuReqType = 2 CriuReqType_CHECK CriuReqType = 3 CriuReqType_PRE_DUMP CriuReqType = 4 CriuReqType_PAGE_SERVER CriuReqType = 5 CriuReqType_NOTIFY CriuReqType = 6 CriuReqType_CPUINFO_DUMP CriuReqType = 7 CriuReqType_CPUINFO_CHECK CriuReqType = 8 CriuReqType_FEATURE_CHECK CriuReqType = 9 ) var CriuReqType_name = map[int32]string{ 0: "EMPTY", 1: "DUMP", 2: "RESTORE", 3: "CHECK", 4: "PRE_DUMP", 5: "PAGE_SERVER", 6: "NOTIFY", 7: "CPUINFO_DUMP", 8: "CPUINFO_CHECK", 9: "FEATURE_CHECK", } var CriuReqType_value = map[string]int32{ "EMPTY": 0, "DUMP": 1, "RESTORE": 2, "CHECK": 3, "PRE_DUMP": 4, "PAGE_SERVER": 5, "NOTIFY": 6, "CPUINFO_DUMP": 7, "CPUINFO_CHECK": 8, "FEATURE_CHECK": 9, } func (x CriuReqType) Enum() *CriuReqType { p := new(CriuReqType) *p = x return p } func (x CriuReqType) String() string { return proto.EnumName(CriuReqType_name, int32(x)) } func (x *CriuReqType) UnmarshalJSON(data []byte) error { value, err := proto.UnmarshalJSONEnum(CriuReqType_value, data, "CriuReqType") if err != nil { return err } *x = CriuReqType(value) return nil } type CriuPageServerInfo struct { Address *string `protobuf:"bytes,1,opt,name=address" json:"address,omitempty"` Port *int32 `protobuf:"varint,2,opt,name=port" json:"port,omitempty"` Pid *int32 `protobuf:"varint,3,opt,name=pid" json:"pid,omitempty"` Fd *int32 `protobuf:"varint,4,opt,name=fd" json:"fd,omitempty"` XXX_unrecognized []byte `json:"-"` } func (m *CriuPageServerInfo) Reset() { *m = CriuPageServerInfo{} } func (m *CriuPageServerInfo) String() string { return proto.CompactTextString(m) } func (*CriuPageServerInfo) ProtoMessage() {} func (m *CriuPageServerInfo) GetAddress() string { if m != nil && m.Address != nil { return *m.Address } return "" } func (m *CriuPageServerInfo) GetPort() int32 { if m != nil && m.Port != nil { return *m.Port } return 0 } func (m *CriuPageServerInfo) GetPid() int32 { if m != nil && m.Pid != nil { return *m.Pid } return 0 } func (m *CriuPageServerInfo) GetFd() int32 { if m != nil && m.Fd != nil { return *m.Fd } return 0 } type CriuVethPair struct { IfIn *string `protobuf:"bytes,1,req,name=if_in" json:"if_in,omitempty"` IfOut *string `protobuf:"bytes,2,req,name=if_out" json:"if_out,omitempty"` XXX_unrecognized []byte `json:"-"` } func (m *CriuVethPair) Reset() { *m = CriuVethPair{} } func (m *CriuVethPair) String() string { return proto.CompactTextString(m) } func (*CriuVethPair) ProtoMessage() {} func (m *CriuVethPair) GetIfIn() string { if m != nil && m.IfIn != nil { return *m.IfIn } return "" } func (m *CriuVethPair) GetIfOut() string { if m != nil && m.IfOut != nil { return *m.IfOut } return "" } type ExtMountMap struct { Key *string `protobuf:"bytes,1,req,name=key" json:"key,omitempty"` Val *string `protobuf:"bytes,2,req,name=val" json:"val,omitempty"` XXX_unrecognized []byte `json:"-"` } func (m *ExtMountMap) Reset() { *m = ExtMountMap{} } func (m *ExtMountMap) String() string { return proto.CompactTextString(m) } func (*ExtMountMap) ProtoMessage() {} func (m *ExtMountMap) GetKey() string { if m != nil && m.Key != nil { return *m.Key } return "" } func (m *ExtMountMap) GetVal() string { if m != nil && m.Val != nil { return *m.Val } return "" } type InheritFd struct { Key *string `protobuf:"bytes,1,req,name=key" json:"key,omitempty"` Fd *int32 `protobuf:"varint,2,req,name=fd" json:"fd,omitempty"` XXX_unrecognized []byte `json:"-"` } func (m *InheritFd) Reset() { *m = InheritFd{} } func (m *InheritFd) String() string { return proto.CompactTextString(m) } func (*InheritFd) ProtoMessage() {} func (m *InheritFd) GetKey() string { if m != nil && m.Key != nil { return *m.Key } return "" } func (m *InheritFd) GetFd() int32 { if m != nil && m.Fd != nil { return *m.Fd } return 0 } type CgroupRoot struct { Ctrl *string `protobuf:"bytes,1,opt,name=ctrl" json:"ctrl,omitempty"` Path *string `protobuf:"bytes,2,req,name=path" json:"path,omitempty"` XXX_unrecognized []byte `json:"-"` } func (m *CgroupRoot) Reset() { *m = CgroupRoot{} } func (m *CgroupRoot) String() string { return proto.CompactTextString(m) } func (*CgroupRoot) ProtoMessage() {} func (m *CgroupRoot) GetCtrl() string { if m != nil && m.Ctrl != nil { return *m.Ctrl } return "" } func (m *CgroupRoot) GetPath() string { if m != nil && m.Path != nil { return *m.Path } return "" } type UnixSk struct { Inode *uint32 `protobuf:"varint,1,req,name=inode" json:"inode,omitempty"` XXX_unrecognized []byte `json:"-"` } func (m *UnixSk) Reset() { *m = UnixSk{} } func (m *UnixSk) String() string { return proto.CompactTextString(m) } func (*UnixSk) ProtoMessage() {} func (m *UnixSk) GetInode() uint32 { if m != nil && m.Inode != nil { return *m.Inode } return 0 } type CriuOpts struct { ImagesDirFd *int32 `protobuf:"varint,1,req,name=images_dir_fd" json:"images_dir_fd,omitempty"` Pid *int32 `protobuf:"varint,2,opt,name=pid" json:"pid,omitempty"` LeaveRunning *bool `protobuf:"varint,3,opt,name=leave_running" json:"leave_running,omitempty"` ExtUnixSk *bool `protobuf:"varint,4,opt,name=ext_unix_sk" json:"ext_unix_sk,omitempty"` TcpEstablished *bool `protobuf:"varint,5,opt,name=tcp_established" json:"tcp_established,omitempty"` EvasiveDevices *bool `protobuf:"varint,6,opt,name=evasive_devices" json:"evasive_devices,omitempty"` ShellJob *bool `protobuf:"varint,7,opt,name=shell_job" json:"shell_job,omitempty"` FileLocks *bool `protobuf:"varint,8,opt,name=file_locks" json:"file_locks,omitempty"` LogLevel *int32 `protobuf:"varint,9,opt,name=log_level,def=2" json:"log_level,omitempty"` LogFile *string `protobuf:"bytes,10,opt,name=log_file" json:"log_file,omitempty"` Ps *CriuPageServerInfo `protobuf:"bytes,11,opt,name=ps" json:"ps,omitempty"` NotifyScripts *bool `protobuf:"varint,12,opt,name=notify_scripts" json:"notify_scripts,omitempty"` Root *string `protobuf:"bytes,13,opt,name=root" json:"root,omitempty"` ParentImg *string `protobuf:"bytes,14,opt,name=parent_img" json:"parent_img,omitempty"` TrackMem *bool `protobuf:"varint,15,opt,name=track_mem" json:"track_mem,omitempty"` AutoDedup *bool `protobuf:"varint,16,opt,name=auto_dedup" json:"auto_dedup,omitempty"` WorkDirFd *int32 `protobuf:"varint,17,opt,name=work_dir_fd" json:"work_dir_fd,omitempty"` LinkRemap *bool `protobuf:"varint,18,opt,name=link_remap" json:"link_remap,omitempty"` Veths []*CriuVethPair `protobuf:"bytes,19,rep,name=veths" json:"veths,omitempty"` CpuCap *uint32 `protobuf:"varint,20,opt,name=cpu_cap,def=4294967295" json:"cpu_cap,omitempty"` ForceIrmap *bool `protobuf:"varint,21,opt,name=force_irmap" json:"force_irmap,omitempty"` ExecCmd []string `protobuf:"bytes,22,rep,name=exec_cmd" json:"exec_cmd,omitempty"` ExtMnt []*ExtMountMap `protobuf:"bytes,23,rep,name=ext_mnt" json:"ext_mnt,omitempty"` ManageCgroups *bool `protobuf:"varint,24,opt,name=manage_cgroups" json:"manage_cgroups,omitempty"` CgRoot []*CgroupRoot `protobuf:"bytes,25,rep,name=cg_root" json:"cg_root,omitempty"` RstSibling *bool `protobuf:"varint,26,opt,name=rst_sibling" json:"rst_sibling,omitempty"` InheritFd []*InheritFd `protobuf:"bytes,27,rep,name=inherit_fd" json:"inherit_fd,omitempty"` AutoExtMnt *bool `protobuf:"varint,28,opt,name=auto_ext_mnt" json:"auto_ext_mnt,omitempty"` ExtSharing *bool `protobuf:"varint,29,opt,name=ext_sharing" json:"ext_sharing,omitempty"` ExtMasters *bool `protobuf:"varint,30,opt,name=ext_masters" json:"ext_masters,omitempty"` SkipMnt []string `protobuf:"bytes,31,rep,name=skip_mnt" json:"skip_mnt,omitempty"` EnableFs []string `protobuf:"bytes,32,rep,name=enable_fs" json:"enable_fs,omitempty"` UnixSkIno []*UnixSk `protobuf:"bytes,33,rep,name=unix_sk_ino" json:"unix_sk_ino,omitempty"` ManageCgroupsMode *CriuCgMode `protobuf:"varint,34,opt,name=manage_cgroups_mode,enum=CriuCgMode" json:"manage_cgroups_mode,omitempty"` GhostLimit *uint32 `protobuf:"varint,35,opt,name=ghost_limit,def=1048576" json:"ghost_limit,omitempty"` IrmapScanPaths []string `protobuf:"bytes,36,rep,name=irmap_scan_paths" json:"irmap_scan_paths,omitempty"` External []string `protobuf:"bytes,37,rep,name=external" json:"external,omitempty"` EmptyNs *uint32 `protobuf:"varint,38,opt,name=empty_ns" json:"empty_ns,omitempty"` NoSeccomp *bool `protobuf:"varint,39,opt,name=no_seccomp" json:"no_seccomp,omitempty"` XXX_unrecognized []byte `json:"-"` } func (m *CriuOpts) Reset() { *m = CriuOpts{} } func (m *CriuOpts) String() string { return proto.CompactTextString(m) } func (*CriuOpts) ProtoMessage() {} const Default_CriuOpts_LogLevel int32 = 2 const Default_CriuOpts_CpuCap uint32 = 4294967295 const Default_CriuOpts_GhostLimit uint32 = 1048576 func (m *CriuOpts) GetImagesDirFd() int32 { if m != nil && m.ImagesDirFd != nil { return *m.ImagesDirFd } return 0 } func (m *CriuOpts) GetPid() int32 { if m != nil && m.Pid != nil { return *m.Pid } return 0 } func (m *CriuOpts) GetLeaveRunning() bool { if m != nil && m.LeaveRunning != nil { return *m.LeaveRunning } return false } func (m *CriuOpts) GetExtUnixSk() bool { if m != nil && m.ExtUnixSk != nil { return *m.ExtUnixSk } return false } func (m *CriuOpts) GetTcpEstablished() bool { if m != nil && m.TcpEstablished != nil { return *m.TcpEstablished } return false } func (m *CriuOpts) GetEvasiveDevices() bool { if m != nil && m.EvasiveDevices != nil { return *m.EvasiveDevices } return false } func (m *CriuOpts) GetShellJob() bool { if m != nil && m.ShellJob != nil { return *m.ShellJob } return false } func (m *CriuOpts) GetFileLocks() bool { if m != nil && m.FileLocks != nil { return *m.FileLocks } return false } func (m *CriuOpts) GetLogLevel() int32 { if m != nil && m.LogLevel != nil { return *m.LogLevel } return Default_CriuOpts_LogLevel } func (m *CriuOpts) GetLogFile() string { if m != nil && m.LogFile != nil { return *m.LogFile } return "" } func (m *CriuOpts) GetPs() *CriuPageServerInfo { if m != nil { return m.Ps } return nil } func (m *CriuOpts) GetNotifyScripts() bool { if m != nil && m.NotifyScripts != nil { return *m.NotifyScripts } return false } func (m *CriuOpts) GetRoot() string { if m != nil && m.Root != nil { return *m.Root } return "" } func (m *CriuOpts) GetParentImg() string { if m != nil && m.ParentImg != nil { return *m.ParentImg } return "" } func (m *CriuOpts) GetTrackMem() bool { if m != nil && m.TrackMem != nil { return *m.TrackMem } return false } func (m *CriuOpts) GetAutoDedup() bool { if m != nil && m.AutoDedup != nil { return *m.AutoDedup } return false } func (m *CriuOpts) GetWorkDirFd() int32 { if m != nil && m.WorkDirFd != nil { return *m.WorkDirFd } return 0 } func (m *CriuOpts) GetLinkRemap() bool { if m != nil && m.LinkRemap != nil { return *m.LinkRemap } return false } func (m *CriuOpts) GetVeths() []*CriuVethPair { if m != nil { return m.Veths } return nil } func (m *CriuOpts) GetCpuCap() uint32 { if m != nil && m.CpuCap != nil { return *m.CpuCap } return Default_CriuOpts_CpuCap } func (m *CriuOpts) GetForceIrmap() bool { if m != nil && m.ForceIrmap != nil { return *m.ForceIrmap } return false } func (m *CriuOpts) GetExecCmd() []string { if m != nil { return m.ExecCmd } return nil } func (m *CriuOpts) GetExtMnt() []*ExtMountMap { if m != nil { return m.ExtMnt } return nil } func (m *CriuOpts) GetManageCgroups() bool { if m != nil && m.ManageCgroups != nil { return *m.ManageCgroups } return false } func (m *CriuOpts) GetCgRoot() []*CgroupRoot { if m != nil { return m.CgRoot } return nil } func (m *CriuOpts) GetRstSibling() bool { if m != nil && m.RstSibling != nil { return *m.RstSibling } return false } func (m *CriuOpts) GetInheritFd() []*InheritFd { if m != nil { return m.InheritFd } return nil } func (m *CriuOpts) GetAutoExtMnt() bool { if m != nil && m.AutoExtMnt != nil { return *m.AutoExtMnt } return false } func (m *CriuOpts) GetExtSharing() bool { if m != nil && m.ExtSharing != nil { return *m.ExtSharing } return false } func (m *CriuOpts) GetExtMasters() bool { if m != nil && m.ExtMasters != nil { return *m.ExtMasters } return false } func (m *CriuOpts) GetSkipMnt() []string { if m != nil { return m.SkipMnt } return nil } func (m *CriuOpts) GetEnableFs() []string { if m != nil { return m.EnableFs } return nil } func (m *CriuOpts) GetUnixSkIno() []*UnixSk { if m != nil { return m.UnixSkIno } return nil } func (m *CriuOpts) GetManageCgroupsMode() CriuCgMode { if m != nil && m.ManageCgroupsMode != nil { return *m.ManageCgroupsMode } return CriuCgMode_IGNORE } func (m *CriuOpts) GetGhostLimit() uint32 { if m != nil && m.GhostLimit != nil { return *m.GhostLimit } return Default_CriuOpts_GhostLimit } func (m *CriuOpts) GetIrmapScanPaths() []string { if m != nil { return m.IrmapScanPaths } return nil } func (m *CriuOpts) GetExternal() []string { if m != nil { return m.External } return nil } func (m *CriuOpts) GetEmptyNs() uint32 { if m != nil && m.EmptyNs != nil { return *m.EmptyNs } return 0 } func (m *CriuOpts) GetNoSeccomp() bool { if m != nil && m.NoSeccomp != nil { return *m.NoSeccomp } return false } type CriuDumpResp struct { Restored *bool `protobuf:"varint,1,opt,name=restored" json:"restored,omitempty"` XXX_unrecognized []byte `json:"-"` } func (m *CriuDumpResp) Reset() { *m = CriuDumpResp{} } func (m *CriuDumpResp) String() string { return proto.CompactTextString(m) } func (*CriuDumpResp) ProtoMessage() {} func (m *CriuDumpResp) GetRestored() bool { if m != nil && m.Restored != nil { return *m.Restored } return false } type CriuRestoreResp struct { Pid *int32 `protobuf:"varint,1,req,name=pid" json:"pid,omitempty"` XXX_unrecognized []byte `json:"-"` } func (m *CriuRestoreResp) Reset() { *m = CriuRestoreResp{} } func (m *CriuRestoreResp) String() string { return proto.CompactTextString(m) } func (*CriuRestoreResp) ProtoMessage() {} func (m *CriuRestoreResp) GetPid() int32 { if m != nil && m.Pid != nil { return *m.Pid } return 0 } type CriuNotify struct { Script *string `protobuf:"bytes,1,opt,name=script" json:"script,omitempty"` Pid *int32 `protobuf:"varint,2,opt,name=pid" json:"pid,omitempty"` XXX_unrecognized []byte `json:"-"` } func (m *CriuNotify) Reset() { *m = CriuNotify{} } func (m *CriuNotify) String() string { return proto.CompactTextString(m) } func (*CriuNotify) ProtoMessage() {} func (m *CriuNotify) GetScript() string { if m != nil && m.Script != nil { return *m.Script } return "" } func (m *CriuNotify) GetPid() int32 { if m != nil && m.Pid != nil { return *m.Pid } return 0 } // // List of features which can queried via // CRIU_REQ_TYPE__FEATURE_CHECK type CriuFeatures struct { MemTrack *bool `protobuf:"varint,1,opt,name=mem_track" json:"mem_track,omitempty"` XXX_unrecognized []byte `json:"-"` } func (m *CriuFeatures) Reset() { *m = CriuFeatures{} } func (m *CriuFeatures) String() string { return proto.CompactTextString(m) } func (*CriuFeatures) ProtoMessage() {} func (m *CriuFeatures) GetMemTrack() bool { if m != nil && m.MemTrack != nil { return *m.MemTrack } return false } type CriuReq struct { Type *CriuReqType `protobuf:"varint,1,req,name=type,enum=CriuReqType" json:"type,omitempty"` Opts *CriuOpts `protobuf:"bytes,2,opt,name=opts" json:"opts,omitempty"` NotifySuccess *bool `protobuf:"varint,3,opt,name=notify_success" json:"notify_success,omitempty"` // // When set service won't close the connection but // will wait for more req-s to appear. Works not // for all request types. KeepOpen *bool `protobuf:"varint,4,opt,name=keep_open" json:"keep_open,omitempty"` // // 'features' can be used to query which features // are supported by the installed criu/kernel // via RPC. Features *CriuFeatures `protobuf:"bytes,5,opt,name=features" json:"features,omitempty"` XXX_unrecognized []byte `json:"-"` } func (m *CriuReq) Reset() { *m = CriuReq{} } func (m *CriuReq) String() string { return proto.CompactTextString(m) } func (*CriuReq) ProtoMessage() {} func (m *CriuReq) GetType() CriuReqType { if m != nil && m.Type != nil { return *m.Type } return CriuReqType_EMPTY } func (m *CriuReq) GetOpts() *CriuOpts { if m != nil { return m.Opts } return nil } func (m *CriuReq) GetNotifySuccess() bool { if m != nil && m.NotifySuccess != nil { return *m.NotifySuccess } return false } func (m *CriuReq) GetKeepOpen() bool { if m != nil && m.KeepOpen != nil { return *m.KeepOpen } return false } func (m *CriuReq) GetFeatures() *CriuFeatures { if m != nil { return m.Features } return nil } type CriuResp struct { Type *CriuReqType `protobuf:"varint,1,req,name=type,enum=CriuReqType" json:"type,omitempty"` Success *bool `protobuf:"varint,2,req,name=success" json:"success,omitempty"` Dump *CriuDumpResp `protobuf:"bytes,3,opt,name=dump" json:"dump,omitempty"` Restore *CriuRestoreResp `protobuf:"bytes,4,opt,name=restore" json:"restore,omitempty"` Notify *CriuNotify `protobuf:"bytes,5,opt,name=notify" json:"notify,omitempty"` Ps *CriuPageServerInfo `protobuf:"bytes,6,opt,name=ps" json:"ps,omitempty"` CrErrno *int32 `protobuf:"varint,7,opt,name=cr_errno" json:"cr_errno,omitempty"` Features *CriuFeatures `protobuf:"bytes,8,opt,name=features" json:"features,omitempty"` XXX_unrecognized []byte `json:"-"` } func (m *CriuResp) Reset() { *m = CriuResp{} } func (m *CriuResp) String() string { return proto.CompactTextString(m) } func (*CriuResp) ProtoMessage() {} func (m *CriuResp) GetType() CriuReqType { if m != nil && m.Type != nil { return *m.Type } return CriuReqType_EMPTY } func (m *CriuResp) GetSuccess() bool { if m != nil && m.Success != nil { return *m.Success } return false } func (m *CriuResp) GetDump() *CriuDumpResp { if m != nil { return m.Dump } return nil } func (m *CriuResp) GetRestore() *CriuRestoreResp { if m != nil { return m.Restore } return nil } func (m *CriuResp) GetNotify() *CriuNotify { if m != nil { return m.Notify } return nil } func (m *CriuResp) GetPs() *CriuPageServerInfo { if m != nil { return m.Ps } return nil } func (m *CriuResp) GetCrErrno() int32 { if m != nil && m.CrErrno != nil { return *m.CrErrno } return 0 } func (m *CriuResp) GetFeatures() *CriuFeatures { if m != nil { return m.Features } return nil } func init() { proto.RegisterEnum("CriuCgMode", CriuCgMode_name, CriuCgMode_value) proto.RegisterEnum("CriuReqType", CriuReqType_name, CriuReqType_value) } docker-runc-tags-docker-1.13.1/libcontainer/criurpc/criurpc.proto000066400000000000000000000073641304443252500251020ustar00rootroot00000000000000message criu_page_server_info { optional string address = 1; optional int32 port = 2; optional int32 pid = 3; optional int32 fd = 4; } message criu_veth_pair { required string if_in = 1; required string if_out = 2; }; message ext_mount_map { required string key = 1; required string val = 2; }; message inherit_fd { required string key = 1; required int32 fd = 2; }; message cgroup_root { optional string ctrl = 1; required string path = 2; }; message unix_sk { required uint32 inode = 1; }; enum criu_cg_mode { IGNORE = 0; NONE = 1; PROPS = 2; SOFT = 3; FULL = 4; STRICT = 5; DEFAULT = 6; }; message criu_opts { required int32 images_dir_fd = 1; optional int32 pid = 2; /* if not set on dump, will dump requesting process */ optional bool leave_running = 3; optional bool ext_unix_sk = 4; optional bool tcp_established = 5; optional bool evasive_devices = 6; optional bool shell_job = 7; optional bool file_locks = 8; optional int32 log_level = 9 [default = 2]; optional string log_file = 10; /* No subdirs are allowed. Consider using work-dir */ optional criu_page_server_info ps = 11; optional bool notify_scripts = 12; optional string root = 13; optional string parent_img = 14; optional bool track_mem = 15; optional bool auto_dedup = 16; optional int32 work_dir_fd = 17; optional bool link_remap = 18; repeated criu_veth_pair veths = 19; optional uint32 cpu_cap = 20 [default = 0xffffffff]; optional bool force_irmap = 21; repeated string exec_cmd = 22; repeated ext_mount_map ext_mnt = 23; optional bool manage_cgroups = 24; /* backward compatibility */ repeated cgroup_root cg_root = 25; optional bool rst_sibling = 26; /* swrk only */ repeated inherit_fd inherit_fd = 27; /* swrk only */ optional bool auto_ext_mnt = 28; optional bool ext_sharing = 29; optional bool ext_masters = 30; repeated string skip_mnt = 31; repeated string enable_fs = 32; repeated unix_sk unix_sk_ino = 33; optional criu_cg_mode manage_cgroups_mode = 34; optional uint32 ghost_limit = 35 [default = 0x100000]; repeated string irmap_scan_paths = 36; repeated string external = 37; optional uint32 empty_ns = 38; optional bool no_seccomp = 39; } message criu_dump_resp { optional bool restored = 1; } message criu_restore_resp { required int32 pid = 1; } message criu_notify { optional string script = 1; optional int32 pid = 2; } enum criu_req_type { EMPTY = 0; DUMP = 1; RESTORE = 2; CHECK = 3; PRE_DUMP = 4; PAGE_SERVER = 5; NOTIFY = 6; CPUINFO_DUMP = 7; CPUINFO_CHECK = 8; FEATURE_CHECK = 9; } /* * List of features which can queried via * CRIU_REQ_TYPE__FEATURE_CHECK */ message criu_features { optional bool mem_track = 1; } /* * Request -- each type corresponds to must-be-there * request arguments of respective type */ message criu_req { required criu_req_type type = 1; optional criu_opts opts = 2; optional bool notify_success = 3; /* * When set service won't close the connection but * will wait for more req-s to appear. Works not * for all request types. */ optional bool keep_open = 4; /* * 'features' can be used to query which features * are supported by the installed criu/kernel * via RPC. */ optional criu_features features = 5; } /* * Response -- it states whether the request was served * and additional request-specific information */ message criu_resp { required criu_req_type type = 1; required bool success = 2; optional criu_dump_resp dump = 3; optional criu_restore_resp restore = 4; optional criu_notify notify = 5; optional criu_page_server_info ps = 6; optional int32 cr_errno = 7; optional criu_features features = 8; } docker-runc-tags-docker-1.13.1/libcontainer/devices/000077500000000000000000000000001304443252500223075ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/libcontainer/devices/devices_test.go000066400000000000000000000025041304443252500253200ustar00rootroot00000000000000// +build linux freebsd package devices import ( "errors" "os" "testing" ) func TestDeviceFromPathLstatFailure(t *testing.T) { testError := errors.New("test error") // Override os.Lstat to inject error. osLstat = func(path string) (os.FileInfo, error) { return nil, testError } _, err := DeviceFromPath("", "") if err != testError { t.Fatalf("Unexpected error %v, expected %v", err, testError) } } func TestHostDevicesIoutilReadDirFailure(t *testing.T) { testError := errors.New("test error") // Override ioutil.ReadDir to inject error. ioutilReadDir = func(dirname string) ([]os.FileInfo, error) { return nil, testError } _, err := HostDevices() if err != testError { t.Fatalf("Unexpected error %v, expected %v", err, testError) } } func TestHostDevicesIoutilReadDirDeepFailure(t *testing.T) { testError := errors.New("test error") called := false // Override ioutil.ReadDir to inject error after the first call. ioutilReadDir = func(dirname string) ([]os.FileInfo, error) { if called { return nil, testError } called = true // Provoke a second call. fi, err := os.Lstat("/tmp") if err != nil { t.Fatalf("Unexpected error %v", err) } return []os.FileInfo{fi}, nil } _, err := HostDevices() if err != testError { t.Fatalf("Unexpected error %v, expected %v", err, testError) } } docker-runc-tags-docker-1.13.1/libcontainer/devices/devices_unix.go000066400000000000000000000043071304443252500253270ustar00rootroot00000000000000// +build linux freebsd package devices import ( "errors" "fmt" "io/ioutil" "os" "path/filepath" "syscall" "github.com/opencontainers/runc/libcontainer/configs" ) var ( ErrNotADevice = errors.New("not a device node") ) // Testing dependencies var ( osLstat = os.Lstat ioutilReadDir = ioutil.ReadDir ) // Given the path to a device and it's cgroup_permissions(which cannot be easily queried) look up the information about a linux device and return that information as a Device struct. func DeviceFromPath(path, permissions string) (*configs.Device, error) { fileInfo, err := osLstat(path) if err != nil { return nil, err } var ( devType rune mode = fileInfo.Mode() fileModePermissionBits = os.FileMode.Perm(mode) ) switch { case mode&os.ModeDevice == 0: return nil, ErrNotADevice case mode&os.ModeCharDevice != 0: fileModePermissionBits |= syscall.S_IFCHR devType = 'c' default: fileModePermissionBits |= syscall.S_IFBLK devType = 'b' } stat_t, ok := fileInfo.Sys().(*syscall.Stat_t) if !ok { return nil, fmt.Errorf("cannot determine the device number for device %s", path) } devNumber := int(stat_t.Rdev) return &configs.Device{ Type: devType, Path: path, Major: Major(devNumber), Minor: Minor(devNumber), Permissions: permissions, FileMode: fileModePermissionBits, Uid: stat_t.Uid, Gid: stat_t.Gid, }, nil } func HostDevices() ([]*configs.Device, error) { return getDevices("/dev") } func getDevices(path string) ([]*configs.Device, error) { files, err := ioutilReadDir(path) if err != nil { return nil, err } out := []*configs.Device{} for _, f := range files { switch { case f.IsDir(): switch f.Name() { case "pts", "shm", "fd", "mqueue": continue default: sub, err := getDevices(filepath.Join(path, f.Name())) if err != nil { return nil, err } out = append(out, sub...) continue } case f.Name() == "console": continue } device, err := DeviceFromPath(filepath.Join(path, f.Name()), "rwm") if err != nil { if err == ErrNotADevice { continue } return nil, err } out = append(out, device) } return out, nil } docker-runc-tags-docker-1.13.1/libcontainer/devices/devices_unsupported.go000066400000000000000000000000431304443252500267250ustar00rootroot00000000000000// +build windows package devices docker-runc-tags-docker-1.13.1/libcontainer/devices/number.go000066400000000000000000000016041304443252500241270ustar00rootroot00000000000000// +build linux freebsd package devices /* This code provides support for manipulating linux device numbers. It should be replaced by normal syscall functions once http://code.google.com/p/go/issues/detail?id=8106 is solved. You can read what they are here: - http://www.makelinux.net/ldd3/chp-3-sect-2 - http://www.linux-tutorial.info/modules.php?name=MContent&pageid=94 Note! These are NOT the same as the MAJOR(dev_t device);, MINOR(dev_t device); and MKDEV(int major, int minor); functions as defined in as the representation of device numbers used by go is different than the one used internally to the kernel! - https://github.com/torvalds/linux/blob/master/include/linux/kdev_t.h#L9 */ func Major(devNumber int) int64 { return int64((devNumber >> 8) & 0xfff) } func Minor(devNumber int) int64 { return int64((devNumber & 0xff) | ((devNumber >> 12) & 0xfff00)) } docker-runc-tags-docker-1.13.1/libcontainer/error.go000066400000000000000000000025441304443252500223520ustar00rootroot00000000000000package libcontainer import "io" // ErrorCode is the API error code type. type ErrorCode int // API error codes. const ( // Factory errors IdInUse ErrorCode = iota InvalidIdFormat // Container errors ContainerNotExists ContainerPaused ContainerNotStopped ContainerNotRunning ContainerNotPaused // Process errors NoProcessOps // Common errors ConfigInvalid ConsoleExists SystemError ) func (c ErrorCode) String() string { switch c { case IdInUse: return "Id already in use" case InvalidIdFormat: return "Invalid format" case ContainerPaused: return "Container paused" case ConfigInvalid: return "Invalid configuration" case SystemError: return "System error" case ContainerNotExists: return "Container does not exist" case ContainerNotStopped: return "Container is not stopped" case ContainerNotRunning: return "Container is not running" case ConsoleExists: return "Console exists for process" case ContainerNotPaused: return "Container is not paused" case NoProcessOps: return "No process operations" default: return "Unknown error" } } // Error is the API error type. type Error interface { error // Returns a verbose string including the error message // and a representation of the stack trace suitable for // printing. Detail(w io.Writer) error // Returns the error code for this error. Code() ErrorCode } docker-runc-tags-docker-1.13.1/libcontainer/error_test.go000066400000000000000000000014331304443252500234050ustar00rootroot00000000000000package libcontainer import "testing" func TestErrorCode(t *testing.T) { codes := map[ErrorCode]string{ IdInUse: "Id already in use", InvalidIdFormat: "Invalid format", ContainerPaused: "Container paused", ConfigInvalid: "Invalid configuration", SystemError: "System error", ContainerNotExists: "Container does not exist", ContainerNotStopped: "Container is not stopped", ContainerNotRunning: "Container is not running", ConsoleExists: "Console exists for process", ContainerNotPaused: "Container is not paused", NoProcessOps: "No process operations", } for code, expected := range codes { if actual := code.String(); actual != expected { t.Fatalf("expected string %q but received %q", expected, actual) } } } docker-runc-tags-docker-1.13.1/libcontainer/factory.go000066400000000000000000000026711304443252500226710ustar00rootroot00000000000000package libcontainer import ( "github.com/opencontainers/runc/libcontainer/configs" ) type Factory interface { // Creates a new container with the given id and starts the initial process inside it. // id must be a string containing only letters, digits and underscores and must contain // between 1 and 1024 characters, inclusive. // // The id must not already be in use by an existing container. Containers created using // a factory with the same path (and file system) must have distinct ids. // // Returns the new container with a running process. // // errors: // IdInUse - id is already in use by a container // InvalidIdFormat - id has incorrect format // ConfigInvalid - config is invalid // Systemerror - System error // // On error, any partially created container parts are cleaned up (the operation is atomic). Create(id string, config *configs.Config) (Container, error) // Load takes an ID for an existing container and returns the container information // from the state. This presents a read only view of the container. // // errors: // Path does not exist // Container is stopped // System error Load(id string) (Container, error) // StartInitialization is an internal API to libcontainer used during the reexec of the // container. // // Errors: // Pipe connection error // System error StartInitialization() error // Type returns info string about factory type (e.g. lxc, libcontainer...) Type() string } docker-runc-tags-docker-1.13.1/libcontainer/factory_linux.go000066400000000000000000000210071304443252500241020ustar00rootroot00000000000000// +build linux package libcontainer import ( "encoding/json" "fmt" "os" "path/filepath" "regexp" "runtime/debug" "strconv" "syscall" "github.com/docker/docker/pkg/mount" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups/fs" "github.com/opencontainers/runc/libcontainer/cgroups/systemd" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs/validate" "github.com/opencontainers/runc/libcontainer/utils" ) const ( stateFilename = "state.json" execFifoFilename = "exec.fifo" ) var ( idRegex = regexp.MustCompile(`^[\w+-\.]+$`) maxIdLen = 1024 ) // InitArgs returns an options func to configure a LinuxFactory with the // provided init binary path and arguments. func InitArgs(args ...string) func(*LinuxFactory) error { return func(l *LinuxFactory) error { l.InitArgs = args return nil } } // SystemdCgroups is an options func to configure a LinuxFactory to return // containers that use systemd to create and manage cgroups. func SystemdCgroups(l *LinuxFactory) error { l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { return &systemd.Manager{ Cgroups: config, Paths: paths, } } return nil } // Cgroupfs is an options func to configure a LinuxFactory to return // containers that use the native cgroups filesystem implementation to // create and manage cgroups. func Cgroupfs(l *LinuxFactory) error { l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { return &fs.Manager{ Cgroups: config, Paths: paths, } } return nil } // TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs. func TmpfsRoot(l *LinuxFactory) error { mounted, err := mount.Mounted(l.Root) if err != nil { return err } if !mounted { if err := syscall.Mount("tmpfs", l.Root, "tmpfs", 0, ""); err != nil { return err } } return nil } // CriuPath returns an option func to configure a LinuxFactory with the // provided criupath func CriuPath(criupath string) func(*LinuxFactory) error { return func(l *LinuxFactory) error { l.CriuPath = criupath return nil } } // New returns a linux based container factory based in the root directory and // configures the factory with the provided option funcs. func New(root string, options ...func(*LinuxFactory) error) (Factory, error) { if root != "" { if err := os.MkdirAll(root, 0700); err != nil { return nil, newGenericError(err, SystemError) } } l := &LinuxFactory{ Root: root, InitArgs: []string{"/proc/self/exe", "init"}, Validator: validate.New(), CriuPath: "criu", } Cgroupfs(l) for _, opt := range options { if err := opt(l); err != nil { return nil, err } } return l, nil } // LinuxFactory implements the default factory interface for linux based systems. type LinuxFactory struct { // Root directory for the factory to store state. Root string // InitArgs are arguments for calling the init responsibilities for spawning // a container. InitArgs []string // CriuPath is the path to the criu binary used for checkpoint and restore of // containers. CriuPath string // Validator provides validation to container configurations. Validator validate.Validator // NewCgroupsManager returns an initialized cgroups manager for a single container. NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager } func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) { if l.Root == "" { return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid) } if err := l.validateID(id); err != nil { return nil, err } if err := l.Validator.Validate(config); err != nil { return nil, newGenericError(err, ConfigInvalid) } uid, err := config.HostUID() if err != nil { return nil, newGenericError(err, SystemError) } gid, err := config.HostGID() if err != nil { return nil, newGenericError(err, SystemError) } containerRoot := filepath.Join(l.Root, id) if _, err := os.Stat(containerRoot); err == nil { return nil, newGenericError(fmt.Errorf("container with id exists: %v", id), IdInUse) } else if !os.IsNotExist(err) { return nil, newGenericError(err, SystemError) } if err := os.MkdirAll(containerRoot, 0711); err != nil { return nil, newGenericError(err, SystemError) } if err := os.Chown(containerRoot, uid, gid); err != nil { return nil, newGenericError(err, SystemError) } fifoName := filepath.Join(containerRoot, execFifoFilename) oldMask := syscall.Umask(0000) if err := syscall.Mkfifo(fifoName, 0622); err != nil { syscall.Umask(oldMask) return nil, newGenericError(err, SystemError) } syscall.Umask(oldMask) if err := os.Chown(fifoName, uid, gid); err != nil { return nil, newGenericError(err, SystemError) } c := &linuxContainer{ id: id, root: containerRoot, config: config, initArgs: l.InitArgs, criuPath: l.CriuPath, cgroupManager: l.NewCgroupsManager(config.Cgroups, nil), } c.state = &stoppedState{c: c} return c, nil } func (l *LinuxFactory) Load(id string) (Container, error) { if l.Root == "" { return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid) } containerRoot := filepath.Join(l.Root, id) state, err := l.loadState(containerRoot, id) if err != nil { return nil, err } r := &nonChildProcess{ processPid: state.InitProcessPid, processStartTime: state.InitProcessStartTime, fds: state.ExternalDescriptors, } c := &linuxContainer{ initProcess: r, initProcessStartTime: state.InitProcessStartTime, id: id, config: &state.Config, initArgs: l.InitArgs, criuPath: l.CriuPath, cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths), root: containerRoot, created: state.Created, } c.state = &loadedState{c: c} if err := c.refreshState(); err != nil { return nil, err } return c, nil } func (l *LinuxFactory) Type() string { return "libcontainer" } // StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state // This is a low level implementation detail of the reexec and should not be consumed externally func (l *LinuxFactory) StartInitialization() (err error) { var pipefd, rootfd int for _, pair := range []struct { k string v *int }{ {"_LIBCONTAINER_INITPIPE", &pipefd}, {"_LIBCONTAINER_STATEDIR", &rootfd}, } { s := os.Getenv(pair.k) i, err := strconv.Atoi(s) if err != nil { return fmt.Errorf("unable to convert %s=%s to int", pair.k, s) } *pair.v = i } var ( pipe = os.NewFile(uintptr(pipefd), "pipe") it = initType(os.Getenv("_LIBCONTAINER_INITTYPE")) ) // clear the current process's environment to clean any libcontainer // specific env vars. os.Clearenv() var i initer defer func() { // We have an error during the initialization of the container's init, // send it back to the parent process in the form of an initError. // If container's init successed, syscall.Exec will not return, hence // this defer function will never be called. if _, ok := i.(*linuxStandardInit); ok { // Synchronisation only necessary for standard init. if werr := utils.WriteJSON(pipe, syncT{procError}); werr != nil { fmt.Fprintln(os.Stderr, err) return } } if werr := utils.WriteJSON(pipe, newSystemError(err)); werr != nil { fmt.Fprintln(os.Stderr, err) return } // ensure that this pipe is always closed pipe.Close() }() defer func() { if e := recover(); e != nil { err = fmt.Errorf("panic from initialization: %v, %v", e, string(debug.Stack())) } }() i, err = newContainerInit(it, pipe, rootfd) if err != nil { return err } return i.Init() } func (l *LinuxFactory) loadState(root, id string) (*State, error) { f, err := os.Open(filepath.Join(root, stateFilename)) if err != nil { if os.IsNotExist(err) { return nil, newGenericError(fmt.Errorf("container %q does not exist", id), ContainerNotExists) } return nil, newGenericError(err, SystemError) } defer f.Close() var state *State if err := json.NewDecoder(f).Decode(&state); err != nil { return nil, newGenericError(err, SystemError) } return state, nil } func (l *LinuxFactory) validateID(id string) error { if !idRegex.MatchString(id) { return newGenericError(fmt.Errorf("invalid id format: %v", id), InvalidIdFormat) } if len(id) > maxIdLen { return newGenericError(fmt.Errorf("invalid id format: %v", id), InvalidIdFormat) } return nil } docker-runc-tags-docker-1.13.1/libcontainer/factory_linux_test.go000066400000000000000000000117651304443252500251530ustar00rootroot00000000000000// +build linux package libcontainer import ( "io/ioutil" "os" "path/filepath" "reflect" "syscall" "testing" "github.com/docker/docker/pkg/mount" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/utils" ) func newTestRoot() (string, error) { dir, err := ioutil.TempDir("", "libcontainer") if err != nil { return "", err } return dir, nil } func TestFactoryNew(t *testing.T) { root, rerr := newTestRoot() if rerr != nil { t.Fatal(rerr) } defer os.RemoveAll(root) factory, err := New(root, Cgroupfs) if err != nil { t.Fatal(err) } if factory == nil { t.Fatal("factory should not be nil") } lfactory, ok := factory.(*LinuxFactory) if !ok { t.Fatal("expected linux factory returned on linux based systems") } if lfactory.Root != root { t.Fatalf("expected factory root to be %q but received %q", root, lfactory.Root) } if factory.Type() != "libcontainer" { t.Fatalf("unexpected factory type: %q, expected %q", factory.Type(), "libcontainer") } } func TestFactoryNewTmpfs(t *testing.T) { root, rerr := newTestRoot() if rerr != nil { t.Fatal(rerr) } defer os.RemoveAll(root) factory, err := New(root, Cgroupfs, TmpfsRoot) if err != nil { t.Fatal(err) } if factory == nil { t.Fatal("factory should not be nil") } lfactory, ok := factory.(*LinuxFactory) if !ok { t.Fatal("expected linux factory returned on linux based systems") } if lfactory.Root != root { t.Fatalf("expected factory root to be %q but received %q", root, lfactory.Root) } if factory.Type() != "libcontainer" { t.Fatalf("unexpected factory type: %q, expected %q", factory.Type(), "libcontainer") } mounted, err := mount.Mounted(lfactory.Root) if err != nil { t.Fatal(err) } if !mounted { t.Fatalf("Factory Root is not mounted") } mounts, err := mount.GetMounts() if err != nil { t.Fatal(err) } var found bool for _, m := range mounts { if m.Mountpoint == lfactory.Root { if m.Fstype != "tmpfs" { t.Fatalf("Fstype of root: %s, expected %s", m.Fstype, "tmpfs") } if m.Source != "tmpfs" { t.Fatalf("Source of root: %s, expected %s", m.Source, "tmpfs") } found = true } } if !found { t.Fatalf("Factory Root is not listed in mounts list") } defer syscall.Unmount(root, syscall.MNT_DETACH) } func TestFactoryLoadNotExists(t *testing.T) { root, rerr := newTestRoot() if rerr != nil { t.Fatal(rerr) } defer os.RemoveAll(root) factory, err := New(root, Cgroupfs) if err != nil { t.Fatal(err) } _, err = factory.Load("nocontainer") if err == nil { t.Fatal("expected nil error loading non-existing container") } lerr, ok := err.(Error) if !ok { t.Fatal("expected libcontainer error type") } if lerr.Code() != ContainerNotExists { t.Fatalf("expected error code %s but received %s", ContainerNotExists, lerr.Code()) } } func TestFactoryLoadContainer(t *testing.T) { root, err := newTestRoot() if err != nil { t.Fatal(err) } defer os.RemoveAll(root) // setup default container config and state for mocking var ( id = "1" expectedHooks = &configs.Hooks{ Prestart: []configs.Hook{ configs.CommandHook{Command: configs.Command{Path: "prestart-hook"}}, }, Poststart: []configs.Hook{ configs.CommandHook{Command: configs.Command{Path: "poststart-hook"}}, }, Poststop: []configs.Hook{ unserializableHook{}, configs.CommandHook{Command: configs.Command{Path: "poststop-hook"}}, }, } expectedConfig = &configs.Config{ Rootfs: "/mycontainer/root", Hooks: expectedHooks, } expectedState = &State{ BaseState: BaseState{ InitProcessPid: 1024, Config: *expectedConfig, }, } ) if err := os.Mkdir(filepath.Join(root, id), 0700); err != nil { t.Fatal(err) } if err := marshal(filepath.Join(root, id, stateFilename), expectedState); err != nil { t.Fatal(err) } factory, err := New(root, Cgroupfs) if err != nil { t.Fatal(err) } container, err := factory.Load(id) if err != nil { t.Fatal(err) } if container.ID() != id { t.Fatalf("expected container id %q but received %q", id, container.ID()) } config := container.Config() if config.Rootfs != expectedConfig.Rootfs { t.Fatalf("expected rootfs %q but received %q", expectedConfig.Rootfs, config.Rootfs) } expectedHooks.Poststop = expectedHooks.Poststop[1:] // expect unserializable hook to be skipped if !reflect.DeepEqual(config.Hooks, expectedHooks) { t.Fatalf("expects hooks %q but received %q", expectedHooks, config.Hooks) } lcontainer, ok := container.(*linuxContainer) if !ok { t.Fatal("expected linux container on linux based systems") } if lcontainer.initProcess.pid() != expectedState.InitProcessPid { t.Fatalf("expected init pid %d but received %d", expectedState.InitProcessPid, lcontainer.initProcess.pid()) } } func marshal(path string, v interface{}) error { f, err := os.Create(path) if err != nil { return err } defer f.Close() return utils.WriteJSON(f, v) } type unserializableHook struct{} func (unserializableHook) Run(configs.HookState) error { return nil } docker-runc-tags-docker-1.13.1/libcontainer/generic_error.go000066400000000000000000000041741304443252500240470ustar00rootroot00000000000000package libcontainer import ( "fmt" "io" "text/template" "time" "github.com/opencontainers/runc/libcontainer/stacktrace" ) type syncType uint8 const ( procReady syncType = iota procError procRun procHooks procResume ) type syncT struct { Type syncType `json:"type"` } var errorTemplate = template.Must(template.New("error").Parse(`Timestamp: {{.Timestamp}} Code: {{.ECode}} {{if .Message }} Message: {{.Message}} {{end}} Frames:{{range $i, $frame := .Stack.Frames}} --- {{$i}}: {{$frame.Function}} Package: {{$frame.Package}} File: {{$frame.File}}@{{$frame.Line}}{{end}} `)) func newGenericError(err error, c ErrorCode) Error { if le, ok := err.(Error); ok { return le } gerr := &genericError{ Timestamp: time.Now(), Err: err, ECode: c, Stack: stacktrace.Capture(1), } if err != nil { gerr.Message = err.Error() } return gerr } func newSystemError(err error) Error { return createSystemError(err, "") } func newSystemErrorWithCausef(err error, cause string, v ...interface{}) Error { return createSystemError(err, fmt.Sprintf(cause, v...)) } func newSystemErrorWithCause(err error, cause string) Error { return createSystemError(err, cause) } // createSystemError creates the specified error with the correct number of // stack frames skipped. This is only to be called by the other functions for // formatting the error. func createSystemError(err error, cause string) Error { gerr := &genericError{ Timestamp: time.Now(), Err: err, ECode: SystemError, Cause: cause, Stack: stacktrace.Capture(2), } if err != nil { gerr.Message = err.Error() } return gerr } type genericError struct { Timestamp time.Time ECode ErrorCode Err error `json:"-"` Cause string Message string Stack stacktrace.Stacktrace } func (e *genericError) Error() string { if e.Cause == "" { return e.Message } frame := e.Stack.Frames[0] return fmt.Sprintf("%s:%d: %s caused %q", frame.File, frame.Line, e.Cause, e.Message) } func (e *genericError) Code() ErrorCode { return e.ECode } func (e *genericError) Detail(w io.Writer) error { return errorTemplate.Execute(w, e) } docker-runc-tags-docker-1.13.1/libcontainer/generic_error_test.go000066400000000000000000000003601304443252500250770ustar00rootroot00000000000000package libcontainer import ( "fmt" "io/ioutil" "testing" ) func TestErrorDetail(t *testing.T) { err := newGenericError(fmt.Errorf("test error"), SystemError) if derr := err.Detail(ioutil.Discard); derr != nil { t.Fatal(derr) } } docker-runc-tags-docker-1.13.1/libcontainer/init_linux.go000066400000000000000000000235451304443252500234070ustar00rootroot00000000000000// +build linux package libcontainer import ( "encoding/json" "fmt" "io" "io/ioutil" "net" "os" "strconv" "strings" "syscall" "github.com/Sirupsen/logrus" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/runc/libcontainer/user" "github.com/opencontainers/runc/libcontainer/utils" "github.com/vishvananda/netlink" ) type initType string const ( initSetns initType = "setns" initStandard initType = "standard" ) type pid struct { Pid int `json:"pid"` } // network is an internal struct used to setup container networks. type network struct { configs.Network // TempVethPeerName is a unique temporary veth peer name that was placed into // the container's namespace. TempVethPeerName string `json:"temp_veth_peer_name"` } // initConfig is used for transferring parameters from Exec() to Init() type initConfig struct { Args []string `json:"args"` Env []string `json:"env"` Cwd string `json:"cwd"` Capabilities []string `json:"capabilities"` ProcessLabel string `json:"process_label"` AppArmorProfile string `json:"apparmor_profile"` NoNewPrivileges bool `json:"no_new_privileges"` User string `json:"user"` AdditionalGroups []string `json:"additional_groups"` Config *configs.Config `json:"config"` Console string `json:"console"` Networks []*network `json:"network"` PassedFilesCount int `json:"passed_files_count"` ContainerId string `json:"containerid"` Rlimits []configs.Rlimit `json:"rlimits"` ExecFifoPath string `json:"start_pipe_path"` } type initer interface { Init() error } func newContainerInit(t initType, pipe *os.File, stateDirFD int) (initer, error) { var config *initConfig if err := json.NewDecoder(pipe).Decode(&config); err != nil { return nil, err } if err := populateProcessEnvironment(config.Env); err != nil { return nil, err } switch t { case initSetns: return &linuxSetnsInit{ config: config, stateDirFD: stateDirFD, }, nil case initStandard: return &linuxStandardInit{ pipe: pipe, parentPid: syscall.Getppid(), config: config, stateDirFD: stateDirFD, }, nil } return nil, fmt.Errorf("unknown init type %q", t) } // populateProcessEnvironment loads the provided environment variables into the // current processes's environment. func populateProcessEnvironment(env []string) error { for _, pair := range env { p := strings.SplitN(pair, "=", 2) if len(p) < 2 { return fmt.Errorf("invalid environment '%v'", pair) } if err := os.Setenv(p[0], p[1]); err != nil { return err } } return nil } // finalizeNamespace drops the caps, sets the correct user // and working dir, and closes any leaked file descriptors // before executing the command inside the namespace func finalizeNamespace(config *initConfig) error { // Ensure that all unwanted fds we may have accidentally // inherited are marked close-on-exec so they stay out of the // container if err := utils.CloseExecFrom(config.PassedFilesCount + 3); err != nil { return err } capabilities := config.Config.Capabilities if config.Capabilities != nil { capabilities = config.Capabilities } w, err := newCapWhitelist(capabilities) if err != nil { return err } // drop capabilities in bounding set before changing user if err := w.dropBoundingSet(); err != nil { return err } // preserve existing capabilities while we change users if err := system.SetKeepCaps(); err != nil { return err } if err := setupUser(config); err != nil { return err } if err := system.ClearKeepCaps(); err != nil { return err } // drop all other capabilities if err := w.drop(); err != nil { return err } if config.Cwd != "" { if err := syscall.Chdir(config.Cwd); err != nil { return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %v", config.Cwd, err) } } return nil } // syncParentReady sends to the given pipe a JSON payload which indicates that // the init is ready to Exec the child process. It then waits for the parent to // indicate that it is cleared to Exec. func syncParentReady(pipe io.ReadWriter) error { // Tell parent. if err := utils.WriteJSON(pipe, syncT{procReady}); err != nil { return err } // Wait for parent to give the all-clear. var procSync syncT if err := json.NewDecoder(pipe).Decode(&procSync); err != nil { if err == io.EOF { return fmt.Errorf("parent closed synchronisation channel") } if procSync.Type != procRun { return fmt.Errorf("invalid synchronisation flag from parent") } } return nil } // syncParentHooks sends to the given pipe a JSON payload which indicates that // the parent should execute pre-start hooks. It then waits for the parent to // indicate that it is cleared to resume. func syncParentHooks(pipe io.ReadWriter) error { // Tell parent. if err := utils.WriteJSON(pipe, syncT{procHooks}); err != nil { return err } // Wait for parent to give the all-clear. var procSync syncT if err := json.NewDecoder(pipe).Decode(&procSync); err != nil { if err == io.EOF { return fmt.Errorf("parent closed synchronisation channel") } if procSync.Type != procResume { return fmt.Errorf("invalid synchronisation flag from parent") } } return nil } // setupUser changes the groups, gid, and uid for the user inside the container func setupUser(config *initConfig) error { // Set up defaults. defaultExecUser := user.ExecUser{ Uid: syscall.Getuid(), Gid: syscall.Getgid(), Home: "/", } passwdPath, err := user.GetPasswdPath() if err != nil { return err } groupPath, err := user.GetGroupPath() if err != nil { return err } execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath) if err != nil { return err } var addGroups []int if len(config.AdditionalGroups) > 0 { addGroups, err = user.GetAdditionalGroupsPath(config.AdditionalGroups, groupPath) if err != nil { return err } } // before we change to the container's user make sure that the processes STDIO // is correctly owned by the user that we are switching to. if err := fixStdioPermissions(execUser); err != nil { return err } suppGroups := append(execUser.Sgids, addGroups...) if err := syscall.Setgroups(suppGroups); err != nil { return err } if err := system.Setgid(execUser.Gid); err != nil { return err } if err := system.Setuid(execUser.Uid); err != nil { return err } // if we didn't get HOME already, set it based on the user's HOME if envHome := os.Getenv("HOME"); envHome == "" { if err := os.Setenv("HOME", execUser.Home); err != nil { return err } } return nil } // fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user. // The ownership needs to match because it is created outside of the container and needs to be // localized. func fixStdioPermissions(u *user.ExecUser) error { var null syscall.Stat_t if err := syscall.Stat("/dev/null", &null); err != nil { return err } for _, fd := range []uintptr{ os.Stdin.Fd(), os.Stderr.Fd(), os.Stdout.Fd(), } { var s syscall.Stat_t if err := syscall.Fstat(int(fd), &s); err != nil { return err } // skip chown of /dev/null if it was used as one of the STDIO fds. if s.Rdev == null.Rdev { continue } if err := syscall.Fchown(int(fd), u.Uid, u.Gid); err != nil { return err } } return nil } // setupNetwork sets up and initializes any network interface inside the container. func setupNetwork(config *initConfig) error { for _, config := range config.Networks { strategy, err := getStrategy(config.Type) if err != nil { return err } if err := strategy.initialize(config); err != nil { return err } } return nil } func setupRoute(config *configs.Config) error { for _, config := range config.Routes { _, dst, err := net.ParseCIDR(config.Destination) if err != nil { return err } src := net.ParseIP(config.Source) if src == nil { return fmt.Errorf("Invalid source for route: %s", config.Source) } gw := net.ParseIP(config.Gateway) if gw == nil { return fmt.Errorf("Invalid gateway for route: %s", config.Gateway) } l, err := netlink.LinkByName(config.InterfaceName) if err != nil { return err } route := &netlink.Route{ Scope: netlink.SCOPE_UNIVERSE, Dst: dst, Src: src, Gw: gw, LinkIndex: l.Attrs().Index, } if err := netlink.RouteAdd(route); err != nil { return err } } return nil } func setupRlimits(limits []configs.Rlimit, pid int) error { for _, rlimit := range limits { if err := system.Prlimit(pid, rlimit.Type, syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}); err != nil { return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err) } } return nil } func setOomScoreAdj(oomScoreAdj int, pid int) error { path := fmt.Sprintf("/proc/%d/oom_score_adj", pid) return ioutil.WriteFile(path, []byte(strconv.Itoa(oomScoreAdj)), 0600) } // signalAllProcesses freezes then iterates over all the processes inside the // manager's cgroups sending a SIGKILL to each process then waiting for them to // exit. func signalAllProcesses(m cgroups.Manager, s os.Signal) error { var procs []*os.Process if err := m.Freeze(configs.Frozen); err != nil { logrus.Warn(err) } pids, err := m.GetAllPids() if err != nil { m.Freeze(configs.Thawed) return err } for _, pid := range pids { p, err := os.FindProcess(pid) if err != nil { logrus.Warn(err) continue } procs = append(procs, p) if err := p.Signal(s); err != nil { logrus.Warn(err) } } if err := m.Freeze(configs.Thawed); err != nil { logrus.Warn(err) } for _, p := range procs { if _, err := p.Wait(); err != nil { logrus.Warn(err) } } return nil } docker-runc-tags-docker-1.13.1/libcontainer/integration/000077500000000000000000000000001304443252500232105ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/libcontainer/integration/checkpoint_test.go000066400000000000000000000067501304443252500267350ustar00rootroot00000000000000package integration import ( "bufio" "bytes" "io/ioutil" "os" "path/filepath" "strings" "syscall" "testing" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/configs" ) func showFile(t *testing.T, fname string) error { t.Logf("=== %s ===\n", fname) f, err := os.Open(fname) if err != nil { t.Log(err) return err } defer f.Close() scanner := bufio.NewScanner(f) for scanner.Scan() { t.Log(scanner.Text()) } if err := scanner.Err(); err != nil { return err } t.Logf("=== END ===\n") return nil } func TestCheckpoint(t *testing.T) { if testing.Short() { return } root, err := newTestRoot() if err != nil { t.Fatal(err) } defer os.RemoveAll(root) rootfs, err := newRootfs() if err != nil { t.Fatal(err) } defer remove(rootfs) config := newTemplateConfig(rootfs) config.Mounts = append(config.Mounts, &configs.Mount{ Destination: "/sys/fs/cgroup", Device: "cgroup", Flags: defaultMountFlags | syscall.MS_RDONLY, }) factory, err := libcontainer.New(root, libcontainer.Cgroupfs) if err != nil { t.Fatal(err) } container, err := factory.Create("test", config) if err != nil { t.Fatal(err) } defer container.Destroy() stdinR, stdinW, err := os.Pipe() if err != nil { t.Fatal(err) } var stdout bytes.Buffer pconfig := libcontainer.Process{ Cwd: "/", Args: []string{"cat"}, Env: standardEnvironment, Stdin: stdinR, Stdout: &stdout, } err = container.Run(&pconfig) stdinR.Close() defer stdinW.Close() if err != nil { t.Fatal(err) } pid, err := pconfig.Pid() if err != nil { t.Fatal(err) } process, err := os.FindProcess(pid) if err != nil { t.Fatal(err) } imagesDir, err := ioutil.TempDir("", "criu") if err != nil { t.Fatal(err) } defer os.RemoveAll(imagesDir) checkpointOpts := &libcontainer.CriuOpts{ ImagesDirectory: imagesDir, WorkDirectory: imagesDir, } dumpLog := filepath.Join(checkpointOpts.WorkDirectory, "dump.log") restoreLog := filepath.Join(checkpointOpts.WorkDirectory, "restore.log") if err := container.Checkpoint(checkpointOpts); err != nil { showFile(t, dumpLog) t.Fatal(err) } state, err := container.Status() if err != nil { t.Fatal(err) } if state != libcontainer.Running { t.Fatal("Unexpected state checkpoint: ", state) } stdinW.Close() _, err = process.Wait() if err != nil { t.Fatal(err) } // reload the container container, err = factory.Load("test") if err != nil { t.Fatal(err) } restoreStdinR, restoreStdinW, err := os.Pipe() if err != nil { t.Fatal(err) } restoreProcessConfig := &libcontainer.Process{ Cwd: "/", Stdin: restoreStdinR, Stdout: &stdout, } err = container.Restore(restoreProcessConfig, checkpointOpts) restoreStdinR.Close() defer restoreStdinW.Close() if err != nil { showFile(t, restoreLog) t.Fatal(err) } state, err = container.Status() if err != nil { t.Fatal(err) } if state != libcontainer.Running { t.Fatal("Unexpected restore state: ", state) } pid, err = restoreProcessConfig.Pid() if err != nil { t.Fatal(err) } process, err = os.FindProcess(pid) if err != nil { t.Fatal(err) } _, err = restoreStdinW.WriteString("Hello!") if err != nil { t.Fatal(err) } restoreStdinW.Close() s, err := process.Wait() if err != nil { t.Fatal(err) } if !s.Success() { t.Fatal(s.String(), pid) } output := string(stdout.Bytes()) if !strings.Contains(output, "Hello!") { t.Fatal("Did not restore the pipe correctly:", output) } } docker-runc-tags-docker-1.13.1/libcontainer/integration/doc.go000066400000000000000000000001231304443252500243000ustar00rootroot00000000000000// integration is used for integration testing of libcontainer package integration docker-runc-tags-docker-1.13.1/libcontainer/integration/exec_test.go000066400000000000000000001177071304443252500255370ustar00rootroot00000000000000package integration import ( "bytes" "fmt" "io/ioutil" "os" "os/exec" "path/filepath" "reflect" "strconv" "strings" "syscall" "testing" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/cgroups/systemd" "github.com/opencontainers/runc/libcontainer/configs" ) func TestExecPS(t *testing.T) { testExecPS(t, false) } func TestUsernsExecPS(t *testing.T) { if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { t.Skip("userns is unsupported") } testExecPS(t, true) } func testExecPS(t *testing.T, userns bool) { if testing.Short() { return } rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) config := newTemplateConfig(rootfs) if userns { config.UidMappings = []configs.IDMap{{0, 0, 1000}} config.GidMappings = []configs.IDMap{{0, 0, 1000}} config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWUSER}) } buffers, exitCode, err := runContainer(config, "", "ps") if err != nil { t.Fatalf("%s: %s", buffers, err) } if exitCode != 0 { t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) } lines := strings.Split(buffers.Stdout.String(), "\n") if len(lines) < 2 { t.Fatalf("more than one process running for output %q", buffers.Stdout.String()) } expected := `1 root ps` actual := strings.Trim(lines[1], "\n ") if actual != expected { t.Fatalf("expected output %q but received %q", expected, actual) } } func TestIPCPrivate(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) l, err := os.Readlink("/proc/1/ns/ipc") ok(t, err) config := newTemplateConfig(rootfs) buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc") ok(t, err) if exitCode != 0 { t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) } if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual == l { t.Fatalf("ipc link should be private to the container but equals host %q %q", actual, l) } } func TestIPCHost(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) l, err := os.Readlink("/proc/1/ns/ipc") ok(t, err) config := newTemplateConfig(rootfs) config.Namespaces.Remove(configs.NEWIPC) buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc") ok(t, err) if exitCode != 0 { t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) } if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual != l { t.Fatalf("ipc link not equal to host link %q %q", actual, l) } } func TestIPCJoinPath(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) l, err := os.Readlink("/proc/1/ns/ipc") ok(t, err) config := newTemplateConfig(rootfs) config.Namespaces.Add(configs.NEWIPC, "/proc/1/ns/ipc") buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc") ok(t, err) if exitCode != 0 { t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) } if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual != l { t.Fatalf("ipc link not equal to host link %q %q", actual, l) } } func TestIPCBadPath(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) config := newTemplateConfig(rootfs) config.Namespaces.Add(configs.NEWIPC, "/proc/1/ns/ipcc") _, _, err = runContainer(config, "", "true") if err == nil { t.Fatal("container succeeded with bad ipc path") } } func TestRlimit(t *testing.T) { testRlimit(t, false) } func TestUsernsRlimit(t *testing.T) { if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { t.Skip("userns is unsupported") } testRlimit(t, true) } func testRlimit(t *testing.T, userns bool) { if testing.Short() { return } rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) config := newTemplateConfig(rootfs) if userns { config.UidMappings = []configs.IDMap{{0, 0, 1000}} config.GidMappings = []configs.IDMap{{0, 0, 1000}} config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWUSER}) } // ensure limit is lower than what the config requests to test that in a user namespace // the Setrlimit call happens early enough that we still have permissions to raise the limit. ok(t, syscall.Setrlimit(syscall.RLIMIT_NOFILE, &syscall.Rlimit{ Max: 1024, Cur: 1024, })) out, _, err := runContainer(config, "", "/bin/sh", "-c", "ulimit -n") ok(t, err) if limit := strings.TrimSpace(out.Stdout.String()); limit != "1025" { t.Fatalf("expected rlimit to be 1025, got %s", limit) } } func TestEnter(t *testing.T) { if testing.Short() { return } root, err := newTestRoot() ok(t, err) defer os.RemoveAll(root) rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) config := newTemplateConfig(rootfs) container, err := factory.Create("test", config) ok(t, err) defer container.Destroy() // Execute a first process in the container stdinR, stdinW, err := os.Pipe() ok(t, err) var stdout, stdout2 bytes.Buffer pconfig := libcontainer.Process{ Cwd: "/", Args: []string{"sh", "-c", "cat && readlink /proc/self/ns/pid"}, Env: standardEnvironment, Stdin: stdinR, Stdout: &stdout, } err = container.Run(&pconfig) stdinR.Close() defer stdinW.Close() ok(t, err) pid, err := pconfig.Pid() ok(t, err) // Execute another process in the container stdinR2, stdinW2, err := os.Pipe() ok(t, err) pconfig2 := libcontainer.Process{ Cwd: "/", Env: standardEnvironment, } pconfig2.Args = []string{"sh", "-c", "cat && readlink /proc/self/ns/pid"} pconfig2.Stdin = stdinR2 pconfig2.Stdout = &stdout2 err = container.Run(&pconfig2) stdinR2.Close() defer stdinW2.Close() ok(t, err) pid2, err := pconfig2.Pid() ok(t, err) processes, err := container.Processes() ok(t, err) n := 0 for i := range processes { if processes[i] == pid || processes[i] == pid2 { n++ } } if n != 2 { t.Fatal("unexpected number of processes", processes, pid, pid2) } // Wait processes stdinW2.Close() waitProcess(&pconfig2, t) stdinW.Close() waitProcess(&pconfig, t) // Check that both processes live in the same pidns pidns := string(stdout.Bytes()) ok(t, err) pidns2 := string(stdout2.Bytes()) ok(t, err) if pidns != pidns2 { t.Fatal("The second process isn't in the required pid namespace", pidns, pidns2) } } func TestProcessEnv(t *testing.T) { if testing.Short() { return } root, err := newTestRoot() ok(t, err) defer os.RemoveAll(root) rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) config := newTemplateConfig(rootfs) container, err := factory.Create("test", config) ok(t, err) defer container.Destroy() var stdout bytes.Buffer pconfig := libcontainer.Process{ Cwd: "/", Args: []string{"sh", "-c", "env"}, Env: []string{ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "HOSTNAME=integration", "TERM=xterm", "FOO=BAR", }, Stdin: nil, Stdout: &stdout, } err = container.Run(&pconfig) ok(t, err) // Wait for process waitProcess(&pconfig, t) outputEnv := string(stdout.Bytes()) // Check that the environment has the key/value pair we added if !strings.Contains(outputEnv, "FOO=BAR") { t.Fatal("Environment doesn't have the expected FOO=BAR key/value pair: ", outputEnv) } // Make sure that HOME is set if !strings.Contains(outputEnv, "HOME=/root") { t.Fatal("Environment doesn't have HOME set: ", outputEnv) } } func TestProcessCaps(t *testing.T) { if testing.Short() { return } root, err := newTestRoot() ok(t, err) defer os.RemoveAll(root) rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) config := newTemplateConfig(rootfs) container, err := factory.Create("test", config) ok(t, err) defer container.Destroy() processCaps := append(config.Capabilities, "CAP_NET_ADMIN") var stdout bytes.Buffer pconfig := libcontainer.Process{ Cwd: "/", Args: []string{"sh", "-c", "cat /proc/self/status"}, Env: standardEnvironment, Capabilities: processCaps, Stdin: nil, Stdout: &stdout, } err = container.Run(&pconfig) ok(t, err) // Wait for process waitProcess(&pconfig, t) outputStatus := string(stdout.Bytes()) lines := strings.Split(outputStatus, "\n") effectiveCapsLine := "" for _, l := range lines { line := strings.TrimSpace(l) if strings.Contains(line, "CapEff:") { effectiveCapsLine = line break } } if effectiveCapsLine == "" { t.Fatal("Couldn't find effective caps: ", outputStatus) } parts := strings.Split(effectiveCapsLine, ":") effectiveCapsStr := strings.TrimSpace(parts[1]) effectiveCaps, err := strconv.ParseUint(effectiveCapsStr, 16, 64) if err != nil { t.Fatal("Could not parse effective caps", err) } var netAdminMask uint64 var netAdminBit uint netAdminBit = 12 // from capability.h netAdminMask = 1 << netAdminBit if effectiveCaps&netAdminMask != netAdminMask { t.Fatal("CAP_NET_ADMIN is not set as expected") } } func TestAdditionalGroups(t *testing.T) { if testing.Short() { return } root, err := newTestRoot() ok(t, err) defer os.RemoveAll(root) rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) config := newTemplateConfig(rootfs) factory, err := libcontainer.New(root, libcontainer.Cgroupfs) ok(t, err) container, err := factory.Create("test", config) ok(t, err) defer container.Destroy() var stdout bytes.Buffer pconfig := libcontainer.Process{ Cwd: "/", Args: []string{"sh", "-c", "id", "-Gn"}, Env: standardEnvironment, Stdin: nil, Stdout: &stdout, AdditionalGroups: []string{"plugdev", "audio"}, } err = container.Run(&pconfig) ok(t, err) // Wait for process waitProcess(&pconfig, t) outputGroups := string(stdout.Bytes()) // Check that the groups output has the groups that we specified if !strings.Contains(outputGroups, "audio") { t.Fatalf("Listed groups do not contain the audio group as expected: %v", outputGroups) } if !strings.Contains(outputGroups, "plugdev") { t.Fatalf("Listed groups do not contain the plugdev group as expected: %v", outputGroups) } } func TestFreeze(t *testing.T) { testFreeze(t, false) } func TestSystemdFreeze(t *testing.T) { if !systemd.UseSystemd() { t.Skip("Systemd is unsupported") } testFreeze(t, true) } func testFreeze(t *testing.T, systemd bool) { if testing.Short() { return } root, err := newTestRoot() ok(t, err) defer os.RemoveAll(root) rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) config := newTemplateConfig(rootfs) f := factory if systemd { f = systemdFactory } container, err := f.Create("test", config) ok(t, err) defer container.Destroy() stdinR, stdinW, err := os.Pipe() ok(t, err) pconfig := &libcontainer.Process{ Cwd: "/", Args: []string{"cat"}, Env: standardEnvironment, Stdin: stdinR, } err = container.Run(pconfig) stdinR.Close() defer stdinW.Close() ok(t, err) err = container.Pause() ok(t, err) state, err := container.Status() ok(t, err) err = container.Resume() ok(t, err) if state != libcontainer.Paused { t.Fatal("Unexpected state: ", state) } stdinW.Close() waitProcess(pconfig, t) } func TestCpuShares(t *testing.T) { testCpuShares(t, false) } func TestCpuSharesSystemd(t *testing.T) { if !systemd.UseSystemd() { t.Skip("Systemd is unsupported") } testCpuShares(t, true) } func testCpuShares(t *testing.T, systemd bool) { if testing.Short() { return } rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) config := newTemplateConfig(rootfs) if systemd { config.Cgroups.Parent = "system.slice" } config.Cgroups.Resources.CpuShares = 1 _, _, err = runContainer(config, "", "ps") if err == nil { t.Fatalf("runContainer should failed with invalid CpuShares") } } func TestPids(t *testing.T) { testPids(t, false) } func TestPidsSystemd(t *testing.T) { if !systemd.UseSystemd() { t.Skip("Systemd is unsupported") } testPids(t, true) } func testPids(t *testing.T, systemd bool) { if testing.Short() { return } rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) config := newTemplateConfig(rootfs) if systemd { config.Cgroups.Parent = "system.slice" } config.Cgroups.Resources.PidsLimit = -1 // Running multiple processes. _, ret, err := runContainer(config, "", "/bin/sh", "-c", "/bin/true | /bin/true | /bin/true | /bin/true") if err != nil && strings.Contains(err.Error(), "no such directory for pids.max") { t.Skip("PIDs cgroup is unsupported") } ok(t, err) if ret != 0 { t.Fatalf("expected fork() to succeed with no pids limit") } // Enforce a permissive limit. This needs to be fairly hand-wavey due to the // issues with running Go binaries with pids restrictions (see below). config.Cgroups.Resources.PidsLimit = 64 _, ret, err = runContainer(config, "", "/bin/sh", "-c", ` /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true`) if err != nil && strings.Contains(err.Error(), "no such directory for pids.max") { t.Skip("PIDs cgroup is unsupported") } ok(t, err) if ret != 0 { t.Fatalf("expected fork() to succeed with permissive pids limit") } // Enforce a restrictive limit. 64 * /bin/true + 1 * shell should cause this // to fail reliability. config.Cgroups.Resources.PidsLimit = 64 out, _, err := runContainer(config, "", "/bin/sh", "-c", ` /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true`) if err != nil && strings.Contains(err.Error(), "no such directory for pids.max") { t.Skip("PIDs cgroup is unsupported") } if err != nil && !strings.Contains(out.String(), "sh: can't fork") { ok(t, err) } if err == nil { t.Fatalf("expected fork() to fail with restrictive pids limit") } // Minimal restrictions are not really supported, due to quirks in using Go // due to the fact that it spawns random processes. While we do our best with // late setting cgroup values, it's just too unreliable with very small pids.max. // As such, we don't test that case. YMMV. } func TestRunWithKernelMemory(t *testing.T) { testRunWithKernelMemory(t, false) } func TestRunWithKernelMemorySystemd(t *testing.T) { if !systemd.UseSystemd() { t.Skip("Systemd is unsupported") } testRunWithKernelMemory(t, true) } func testRunWithKernelMemory(t *testing.T, systemd bool) { if testing.Short() { return } rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) config := newTemplateConfig(rootfs) if systemd { config.Cgroups.Parent = "system.slice" } config.Cgroups.Resources.KernelMemory = 52428800 _, _, err = runContainer(config, "", "ps") if err != nil { t.Fatalf("runContainer failed with kernel memory limit: %v", err) } } func TestContainerState(t *testing.T) { if testing.Short() { return } root, err := newTestRoot() if err != nil { t.Fatal(err) } defer os.RemoveAll(root) rootfs, err := newRootfs() if err != nil { t.Fatal(err) } defer remove(rootfs) l, err := os.Readlink("/proc/1/ns/ipc") if err != nil { t.Fatal(err) } config := newTemplateConfig(rootfs) config.Namespaces = configs.Namespaces([]configs.Namespace{ {Type: configs.NEWNS}, {Type: configs.NEWUTS}, // host for IPC //{Type: configs.NEWIPC}, {Type: configs.NEWPID}, {Type: configs.NEWNET}, }) container, err := factory.Create("test", config) if err != nil { t.Fatal(err) } defer container.Destroy() stdinR, stdinW, err := os.Pipe() if err != nil { t.Fatal(err) } p := &libcontainer.Process{ Cwd: "/", Args: []string{"cat"}, Env: standardEnvironment, Stdin: stdinR, } err = container.Run(p) if err != nil { t.Fatal(err) } stdinR.Close() defer stdinW.Close() st, err := container.State() if err != nil { t.Fatal(err) } l1, err := os.Readlink(st.NamespacePaths[configs.NEWIPC]) if err != nil { t.Fatal(err) } if l1 != l { t.Fatal("Container using non-host ipc namespace") } stdinW.Close() waitProcess(p, t) } func TestPassExtraFiles(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() if err != nil { t.Fatal(err) } defer remove(rootfs) config := newTemplateConfig(rootfs) container, err := factory.Create("test", config) if err != nil { t.Fatal(err) } defer container.Destroy() var stdout bytes.Buffer pipeout1, pipein1, err := os.Pipe() pipeout2, pipein2, err := os.Pipe() process := libcontainer.Process{ Cwd: "/", Args: []string{"sh", "-c", "cd /proc/$$/fd; echo -n *; echo -n 1 >3; echo -n 2 >4"}, Env: []string{"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"}, ExtraFiles: []*os.File{pipein1, pipein2}, Stdin: nil, Stdout: &stdout, } err = container.Run(&process) if err != nil { t.Fatal(err) } waitProcess(&process, t) out := string(stdout.Bytes()) // fd 5 is the directory handle for /proc/$$/fd if out != "0 1 2 3 4 5" { t.Fatalf("expected to have the file descriptors '0 1 2 3 4 5' passed to init, got '%s'", out) } var buf = []byte{0} _, err = pipeout1.Read(buf) if err != nil { t.Fatal(err) } out1 := string(buf) if out1 != "1" { t.Fatalf("expected first pipe to receive '1', got '%s'", out1) } _, err = pipeout2.Read(buf) if err != nil { t.Fatal(err) } out2 := string(buf) if out2 != "2" { t.Fatalf("expected second pipe to receive '2', got '%s'", out2) } } func TestMountCmds(t *testing.T) { if testing.Short() { return } root, err := newTestRoot() if err != nil { t.Fatal(err) } defer os.RemoveAll(root) rootfs, err := newRootfs() if err != nil { t.Fatal(err) } defer remove(rootfs) tmpDir, err := ioutil.TempDir("", "tmpdir") if err != nil { t.Fatal(err) } defer os.RemoveAll(tmpDir) config := newTemplateConfig(rootfs) config.Mounts = append(config.Mounts, &configs.Mount{ Source: tmpDir, Destination: "/tmp", Device: "bind", Flags: syscall.MS_BIND | syscall.MS_REC, PremountCmds: []configs.Command{ {Path: "touch", Args: []string{filepath.Join(tmpDir, "hello")}}, {Path: "touch", Args: []string{filepath.Join(tmpDir, "world")}}, }, PostmountCmds: []configs.Command{ {Path: "cp", Args: []string{filepath.Join(rootfs, "tmp", "hello"), filepath.Join(rootfs, "tmp", "hello-backup")}}, {Path: "cp", Args: []string{filepath.Join(rootfs, "tmp", "world"), filepath.Join(rootfs, "tmp", "world-backup")}}, }, }) container, err := factory.Create("test", config) if err != nil { t.Fatal(err) } defer container.Destroy() pconfig := libcontainer.Process{ Cwd: "/", Args: []string{"sh", "-c", "env"}, Env: standardEnvironment, } err = container.Run(&pconfig) if err != nil { t.Fatal(err) } // Wait for process waitProcess(&pconfig, t) entries, err := ioutil.ReadDir(tmpDir) if err != nil { t.Fatal(err) } expected := []string{"hello", "hello-backup", "world", "world-backup"} for i, e := range entries { if e.Name() != expected[i] { t.Errorf("Got(%s), expect %s", e.Name(), expected[i]) } } } func TestSysctl(t *testing.T) { if testing.Short() { return } root, err := newTestRoot() ok(t, err) defer os.RemoveAll(root) rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) config := newTemplateConfig(rootfs) config.Sysctl = map[string]string{ "kernel.shmmni": "8192", } container, err := factory.Create("test", config) ok(t, err) defer container.Destroy() var stdout bytes.Buffer pconfig := libcontainer.Process{ Cwd: "/", Args: []string{"sh", "-c", "cat /proc/sys/kernel/shmmni"}, Env: standardEnvironment, Stdin: nil, Stdout: &stdout, } err = container.Run(&pconfig) ok(t, err) // Wait for process waitProcess(&pconfig, t) shmmniOutput := strings.TrimSpace(string(stdout.Bytes())) if shmmniOutput != "8192" { t.Fatalf("kernel.shmmni property expected to be 8192, but is %s", shmmniOutput) } } func TestMountCgroupRO(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) config := newTemplateConfig(rootfs) config.Mounts = append(config.Mounts, &configs.Mount{ Destination: "/sys/fs/cgroup", Device: "cgroup", Flags: defaultMountFlags | syscall.MS_RDONLY, }) buffers, exitCode, err := runContainer(config, "", "mount") if err != nil { t.Fatalf("%s: %s", buffers, err) } if exitCode != 0 { t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) } mountInfo := buffers.Stdout.String() lines := strings.Split(mountInfo, "\n") for _, l := range lines { if strings.HasPrefix(l, "tmpfs on /sys/fs/cgroup") { if !strings.Contains(l, "ro") || !strings.Contains(l, "nosuid") || !strings.Contains(l, "nodev") || !strings.Contains(l, "noexec") { t.Fatalf("Mode expected to contain 'ro,nosuid,nodev,noexec': %s", l) } if !strings.Contains(l, "mode=755") { t.Fatalf("Mode expected to contain 'mode=755': %s", l) } continue } if !strings.HasPrefix(l, "cgroup") { continue } if !strings.Contains(l, "ro") || !strings.Contains(l, "nosuid") || !strings.Contains(l, "nodev") || !strings.Contains(l, "noexec") { t.Fatalf("Mode expected to contain 'ro,nosuid,nodev,noexec': %s", l) } } } func TestMountCgroupRW(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) config := newTemplateConfig(rootfs) config.Mounts = append(config.Mounts, &configs.Mount{ Destination: "/sys/fs/cgroup", Device: "cgroup", Flags: defaultMountFlags, }) buffers, exitCode, err := runContainer(config, "", "mount") if err != nil { t.Fatalf("%s: %s", buffers, err) } if exitCode != 0 { t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) } mountInfo := buffers.Stdout.String() lines := strings.Split(mountInfo, "\n") for _, l := range lines { if strings.HasPrefix(l, "tmpfs on /sys/fs/cgroup") { if !strings.Contains(l, "rw") || !strings.Contains(l, "nosuid") || !strings.Contains(l, "nodev") || !strings.Contains(l, "noexec") { t.Fatalf("Mode expected to contain 'rw,nosuid,nodev,noexec': %s", l) } if !strings.Contains(l, "mode=755") { t.Fatalf("Mode expected to contain 'mode=755': %s", l) } continue } if !strings.HasPrefix(l, "cgroup") { continue } if !strings.Contains(l, "rw") || !strings.Contains(l, "nosuid") || !strings.Contains(l, "nodev") || !strings.Contains(l, "noexec") { t.Fatalf("Mode expected to contain 'rw,nosuid,nodev,noexec': %s", l) } } } func TestOomScoreAdj(t *testing.T) { if testing.Short() { return } root, err := newTestRoot() ok(t, err) defer os.RemoveAll(root) rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) config := newTemplateConfig(rootfs) config.OomScoreAdj = 200 factory, err := libcontainer.New(root, libcontainer.Cgroupfs) ok(t, err) container, err := factory.Create("test", config) ok(t, err) defer container.Destroy() var stdout bytes.Buffer pconfig := libcontainer.Process{ Cwd: "/", Args: []string{"sh", "-c", "cat /proc/self/oom_score_adj"}, Env: standardEnvironment, Stdin: nil, Stdout: &stdout, } err = container.Run(&pconfig) ok(t, err) // Wait for process waitProcess(&pconfig, t) outputOomScoreAdj := strings.TrimSpace(string(stdout.Bytes())) // Check that the oom_score_adj matches the value that was set as part of config. if outputOomScoreAdj != strconv.Itoa(config.OomScoreAdj) { t.Fatalf("Expected oom_score_adj %d; got %q", config.OomScoreAdj, outputOomScoreAdj) } } func TestHook(t *testing.T) { if testing.Short() { return } root, err := newTestRoot() ok(t, err) defer os.RemoveAll(root) rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) config := newTemplateConfig(rootfs) expectedBundlePath := "/path/to/bundle/path" config.Labels = append(config.Labels, fmt.Sprintf("bundle=%s", expectedBundlePath)) config.Hooks = &configs.Hooks{ Prestart: []configs.Hook{ configs.NewFunctionHook(func(s configs.HookState) error { if s.BundlePath != expectedBundlePath { t.Fatalf("Expected prestart hook bundlePath '%s'; got '%s'", expectedBundlePath, s.BundlePath) } f, err := os.Create(filepath.Join(s.Root, "test")) if err != nil { return err } return f.Close() }), }, Poststart: []configs.Hook{ configs.NewFunctionHook(func(s configs.HookState) error { if s.BundlePath != expectedBundlePath { t.Fatalf("Expected poststart hook bundlePath '%s'; got '%s'", expectedBundlePath, s.BundlePath) } return ioutil.WriteFile(filepath.Join(s.Root, "test"), []byte("hello world"), 0755) }), }, Poststop: []configs.Hook{ configs.NewFunctionHook(func(s configs.HookState) error { if s.BundlePath != expectedBundlePath { t.Fatalf("Expected poststop hook bundlePath '%s'; got '%s'", expectedBundlePath, s.BundlePath) } return os.RemoveAll(filepath.Join(s.Root, "test")) }), }, } container, err := factory.Create("test", config) ok(t, err) var stdout bytes.Buffer pconfig := libcontainer.Process{ Cwd: "/", Args: []string{"sh", "-c", "ls /test"}, Env: standardEnvironment, Stdin: nil, Stdout: &stdout, } err = container.Run(&pconfig) ok(t, err) // Wait for process waitProcess(&pconfig, t) outputLs := string(stdout.Bytes()) // Check that the ls output has the expected file touched by the prestart hook if !strings.Contains(outputLs, "/test") { container.Destroy() t.Fatalf("ls output doesn't have the expected file: %s", outputLs) } // Check that the file is written by the poststart hook testFilePath := filepath.Join(rootfs, "test") contents, err := ioutil.ReadFile(testFilePath) if err != nil { t.Fatalf("cannot read file '%s': %s", testFilePath, err) } if string(contents) != "hello world" { t.Fatalf("Expected test file to contain 'hello world'; got '%s'", string(contents)) } if err := container.Destroy(); err != nil { t.Fatalf("container destroy %s", err) } fi, err := os.Stat(filepath.Join(rootfs, "test")) if err == nil || !os.IsNotExist(err) { t.Fatalf("expected file to not exist, got %s", fi.Name()) } } func TestSTDIOPermissions(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) config := newTemplateConfig(rootfs) buffers, exitCode, err := runContainer(config, "", "sh", "-c", "echo hi > /dev/stderr") ok(t, err) if exitCode != 0 { t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) } if actual := strings.Trim(buffers.Stderr.String(), "\n"); actual != "hi" { t.Fatalf("stderr should equal be equal %q %q", actual, "hi") } } func unmountOp(path string) error { if err := syscall.Unmount(path, syscall.MNT_DETACH); err != nil { return err } return nil } // Launch container with rootfsPropagation in rslave mode. Also // bind mount a volume /mnt1host at /mnt1cont at the time of launch. Now do // another mount on host (/mnt1host/mnt2host) and this new mount should // propagate to container (/mnt1cont/mnt2host) func TestRootfsPropagationSlaveMount(t *testing.T) { var mountPropagated bool var dir1cont string var dir2cont string dir1cont = "/root/mnt1cont" if testing.Short() { return } rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) config := newTemplateConfig(rootfs) config.RootPropagation = syscall.MS_SLAVE | syscall.MS_REC // Bind mount a volume dir1host, err := ioutil.TempDir("", "mnt1host") ok(t, err) defer os.RemoveAll(dir1host) // Make this dir a "shared" mount point. This will make sure a // slave relationship can be established in container. err = syscall.Mount(dir1host, dir1host, "bind", syscall.MS_BIND|syscall.MS_REC, "") ok(t, err) err = syscall.Mount("", dir1host, "", syscall.MS_SHARED|syscall.MS_REC, "") ok(t, err) defer unmountOp(dir1host) config.Mounts = append(config.Mounts, &configs.Mount{ Source: dir1host, Destination: dir1cont, Device: "bind", Flags: syscall.MS_BIND | syscall.MS_REC}) // TODO: systemd specific processing f := factory container, err := f.Create("testSlaveMount", config) ok(t, err) defer container.Destroy() stdinR, stdinW, err := os.Pipe() ok(t, err) pconfig := &libcontainer.Process{ Cwd: "/", Args: []string{"cat"}, Env: standardEnvironment, Stdin: stdinR, } err = container.Run(pconfig) stdinR.Close() defer stdinW.Close() ok(t, err) // Create mnt1host/mnt2host and bind mount itself on top of it. This // should be visible in container. dir2host, err := ioutil.TempDir(dir1host, "mnt2host") ok(t, err) defer os.RemoveAll(dir2host) err = syscall.Mount(dir2host, dir2host, "bind", syscall.MS_BIND, "") defer unmountOp(dir2host) ok(t, err) // Run "cat /proc/self/mountinfo" in container and look at mount points. var stdout2 bytes.Buffer stdinR2, stdinW2, err := os.Pipe() ok(t, err) pconfig2 := &libcontainer.Process{ Cwd: "/", Args: []string{"cat", "/proc/self/mountinfo"}, Env: standardEnvironment, Stdin: stdinR2, Stdout: &stdout2, } err = container.Run(pconfig2) stdinR2.Close() defer stdinW2.Close() ok(t, err) stdinW2.Close() waitProcess(pconfig2, t) stdinW.Close() waitProcess(pconfig, t) mountPropagated = false dir2cont = filepath.Join(dir1cont, filepath.Base(dir2host)) propagationInfo := string(stdout2.Bytes()) lines := strings.Split(propagationInfo, "\n") for _, l := range lines { linefields := strings.Split(l, " ") if len(linefields) < 5 { continue } if linefields[4] == dir2cont { mountPropagated = true break } } if mountPropagated != true { t.Fatalf("Mount on host %s did not propagate in container at %s\n", dir2host, dir2cont) } } // Launch container with rootfsPropagation 0 so no propagation flags are // applied. Also bind mount a volume /mnt1host at /mnt1cont at the time of // launch. Now do a mount in container (/mnt1cont/mnt2cont) and this new // mount should propagate to host (/mnt1host/mnt2cont) func TestRootfsPropagationSharedMount(t *testing.T) { var dir1cont string var dir2cont string dir1cont = "/root/mnt1cont" if testing.Short() { return } rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) config := newTemplateConfig(rootfs) config.RootPropagation = syscall.MS_PRIVATE // Bind mount a volume dir1host, err := ioutil.TempDir("", "mnt1host") ok(t, err) defer os.RemoveAll(dir1host) // Make this dir a "shared" mount point. This will make sure a // shared relationship can be established in container. err = syscall.Mount(dir1host, dir1host, "bind", syscall.MS_BIND|syscall.MS_REC, "") ok(t, err) err = syscall.Mount("", dir1host, "", syscall.MS_SHARED|syscall.MS_REC, "") ok(t, err) defer unmountOp(dir1host) config.Mounts = append(config.Mounts, &configs.Mount{ Source: dir1host, Destination: dir1cont, Device: "bind", Flags: syscall.MS_BIND | syscall.MS_REC}) // TODO: systemd specific processing f := factory container, err := f.Create("testSharedMount", config) ok(t, err) defer container.Destroy() stdinR, stdinW, err := os.Pipe() ok(t, err) pconfig := &libcontainer.Process{ Cwd: "/", Args: []string{"cat"}, Env: standardEnvironment, Stdin: stdinR, } err = container.Run(pconfig) stdinR.Close() defer stdinW.Close() ok(t, err) // Create mnt1host/mnt2cont. This will become visible inside container // at mnt1cont/mnt2cont. Bind mount itself on top of it. This // should be visible on host now. dir2host, err := ioutil.TempDir(dir1host, "mnt2cont") ok(t, err) defer os.RemoveAll(dir2host) dir2cont = filepath.Join(dir1cont, filepath.Base(dir2host)) // Mount something in container and see if it is visible on host. var stdout2 bytes.Buffer stdinR2, stdinW2, err := os.Pipe() ok(t, err) // Provide CAP_SYS_ADMIN processCaps := append(config.Capabilities, "CAP_SYS_ADMIN") pconfig2 := &libcontainer.Process{ Cwd: "/", Args: []string{"mount", "--bind", dir2cont, dir2cont}, Env: standardEnvironment, Stdin: stdinR2, Stdout: &stdout2, Capabilities: processCaps, } err = container.Run(pconfig2) stdinR2.Close() defer stdinW2.Close() ok(t, err) // Wait for process stdinW2.Close() waitProcess(pconfig2, t) stdinW.Close() waitProcess(pconfig, t) defer unmountOp(dir2host) // Check if mount is visible on host or not. out, err := exec.Command("findmnt", "-n", "-f", "-oTARGET", dir2host).CombinedOutput() outtrim := strings.TrimSpace(string(out)) if err != nil { t.Logf("findmnt error %q: %q", err, outtrim) } if string(outtrim) != dir2host { t.Fatalf("Mount in container on %s did not propagate to host on %s. finmnt output=%s", dir2cont, dir2host, outtrim) } } func TestPIDHost(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) l, err := os.Readlink("/proc/1/ns/pid") ok(t, err) config := newTemplateConfig(rootfs) config.Namespaces.Remove(configs.NEWPID) buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/pid") ok(t, err) if exitCode != 0 { t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) } if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual != l { t.Fatalf("ipc link not equal to host link %q %q", actual, l) } } func TestInitJoinPID(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) // Execute a long-running container container1, err := newContainer(newTemplateConfig(rootfs)) ok(t, err) defer container1.Destroy() stdinR1, stdinW1, err := os.Pipe() ok(t, err) init1 := &libcontainer.Process{ Cwd: "/", Args: []string{"cat"}, Env: standardEnvironment, Stdin: stdinR1, } err = container1.Run(init1) stdinR1.Close() defer stdinW1.Close() ok(t, err) // get the state of the first container state1, err := container1.State() ok(t, err) pidns1 := state1.NamespacePaths[configs.NEWPID] // Run a container inside the existing pidns but with different cgroups config2 := newTemplateConfig(rootfs) config2.Namespaces.Add(configs.NEWPID, pidns1) config2.Cgroups.Path = "integration/test2" container2, err := newContainerWithName("testCT2", config2) ok(t, err) defer container2.Destroy() stdinR2, stdinW2, err := os.Pipe() ok(t, err) init2 := &libcontainer.Process{ Cwd: "/", Args: []string{"cat"}, Env: standardEnvironment, Stdin: stdinR2, } err = container2.Run(init2) stdinR2.Close() defer stdinW2.Close() ok(t, err) // get the state of the second container state2, err := container2.State() ok(t, err) ns1, err := os.Readlink(fmt.Sprintf("/proc/%d/ns/pid", state1.InitProcessPid)) ok(t, err) ns2, err := os.Readlink(fmt.Sprintf("/proc/%d/ns/pid", state2.InitProcessPid)) ok(t, err) if ns1 != ns2 { t.Errorf("pidns(%s), wanted %s", ns2, ns1) } // check that namespaces are not the same if reflect.DeepEqual(state2.NamespacePaths, state1.NamespacePaths) { t.Errorf("Namespaces(%v), original %v", state2.NamespacePaths, state1.NamespacePaths) } // check that pidns is joined correctly. The initial container process list // should contain the second container's init process buffers := newStdBuffers() ps := &libcontainer.Process{ Cwd: "/", Args: []string{"ps"}, Env: standardEnvironment, Stdout: buffers.Stdout, } err = container1.Run(ps) ok(t, err) waitProcess(ps, t) // Stop init processes one by one. Stop the second container should // not stop the first. stdinW2.Close() waitProcess(init2, t) stdinW1.Close() waitProcess(init1, t) out := strings.TrimSpace(buffers.Stdout.String()) // output of ps inside the initial PID namespace should have // 1 line of header, // 2 lines of init processes, // 1 line of ps process if len(strings.Split(out, "\n")) != 4 { t.Errorf("unexpected running process, output %q", out) } } func TestInitJoinNetworkAndUser(t *testing.T) { if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { t.Skip("userns is unsupported") } if testing.Short() { return } rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) // Execute a long-running container config1 := newTemplateConfig(rootfs) config1.UidMappings = []configs.IDMap{{0, 0, 1000}} config1.GidMappings = []configs.IDMap{{0, 0, 1000}} config1.Namespaces = append(config1.Namespaces, configs.Namespace{Type: configs.NEWUSER}) container1, err := newContainer(config1) ok(t, err) defer container1.Destroy() stdinR1, stdinW1, err := os.Pipe() ok(t, err) init1 := &libcontainer.Process{ Cwd: "/", Args: []string{"cat"}, Env: standardEnvironment, Stdin: stdinR1, } err = container1.Run(init1) stdinR1.Close() defer stdinW1.Close() ok(t, err) // get the state of the first container state1, err := container1.State() ok(t, err) netns1 := state1.NamespacePaths[configs.NEWNET] userns1 := state1.NamespacePaths[configs.NEWUSER] // Run a container inside the existing pidns but with different cgroups rootfs2, err := newRootfs() ok(t, err) defer remove(rootfs2) config2 := newTemplateConfig(rootfs2) config2.UidMappings = []configs.IDMap{{0, 0, 1000}} config2.GidMappings = []configs.IDMap{{0, 0, 1000}} config2.Namespaces.Add(configs.NEWNET, netns1) config2.Namespaces.Add(configs.NEWUSER, userns1) config2.Cgroups.Path = "integration/test2" container2, err := newContainerWithName("testCT2", config2) ok(t, err) defer container2.Destroy() stdinR2, stdinW2, err := os.Pipe() ok(t, err) init2 := &libcontainer.Process{ Cwd: "/", Args: []string{"cat"}, Env: standardEnvironment, Stdin: stdinR2, } err = container2.Run(init2) stdinR2.Close() defer stdinW2.Close() ok(t, err) // get the state of the second container state2, err := container2.State() ok(t, err) for _, ns := range []string{"net", "user"} { ns1, err := os.Readlink(fmt.Sprintf("/proc/%d/ns/%s", state1.InitProcessPid, ns)) ok(t, err) ns2, err := os.Readlink(fmt.Sprintf("/proc/%d/ns/%s", state2.InitProcessPid, ns)) ok(t, err) if ns1 != ns2 { t.Errorf("%s(%s), wanted %s", ns, ns2, ns1) } } // check that namespaces are not the same if reflect.DeepEqual(state2.NamespacePaths, state1.NamespacePaths) { t.Errorf("Namespaces(%v), original %v", state2.NamespacePaths, state1.NamespacePaths) } // Stop init processes one by one. Stop the second container should // not stop the first. stdinW2.Close() waitProcess(init2, t) stdinW1.Close() waitProcess(init1, t) } func TestTmpfsCopyUp(t *testing.T) { if testing.Short() { return } root, err := newTestRoot() ok(t, err) defer os.RemoveAll(root) rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) config := newTemplateConfig(rootfs) config.Mounts = append(config.Mounts, &configs.Mount{ Source: "tmpfs", Destination: "/etc", Device: "tmpfs", Extensions: configs.EXT_COPYUP, }) factory, err := libcontainer.New(root, libcontainer.Cgroupfs) ok(t, err) container, err := factory.Create("test", config) ok(t, err) defer container.Destroy() var stdout bytes.Buffer pconfig := libcontainer.Process{ Args: []string{"ls", "/etc/passwd"}, Env: standardEnvironment, Stdin: nil, Stdout: &stdout, } err = container.Run(&pconfig) ok(t, err) // Wait for process waitProcess(&pconfig, t) outputLs := string(stdout.Bytes()) // Check that the ls output has /etc/passwd if !strings.Contains(outputLs, "/etc/passwd") { t.Fatalf("/etc/passwd not copied up as expected: %v", outputLs) } } docker-runc-tags-docker-1.13.1/libcontainer/integration/execin_test.go000066400000000000000000000304521304443252500260550ustar00rootroot00000000000000package integration import ( "bytes" "fmt" "io" "os" "strconv" "strings" "syscall" "testing" "time" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/configs" ) func TestExecIn(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) config := newTemplateConfig(rootfs) container, err := newContainer(config) ok(t, err) defer container.Destroy() // Execute a first process in the container stdinR, stdinW, err := os.Pipe() ok(t, err) process := &libcontainer.Process{ Cwd: "/", Args: []string{"cat"}, Env: standardEnvironment, Stdin: stdinR, } err = container.Run(process) stdinR.Close() defer stdinW.Close() ok(t, err) buffers := newStdBuffers() ps := &libcontainer.Process{ Cwd: "/", Args: []string{"ps"}, Env: standardEnvironment, Stdin: buffers.Stdin, Stdout: buffers.Stdout, Stderr: buffers.Stderr, } err = container.Run(ps) ok(t, err) waitProcess(ps, t) stdinW.Close() waitProcess(process, t) out := buffers.Stdout.String() if !strings.Contains(out, "cat") || !strings.Contains(out, "ps") { t.Fatalf("unexpected running process, output %q", out) } if strings.Contains(out, "\r") { t.Fatalf("unexpected carriage-return in output") } } func TestExecInUsernsRlimit(t *testing.T) { if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { t.Skip("userns is unsupported") } testExecInRlimit(t, true) } func TestExecInRlimit(t *testing.T) { testExecInRlimit(t, false) } func testExecInRlimit(t *testing.T, userns bool) { if testing.Short() { return } rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) config := newTemplateConfig(rootfs) if userns { config.UidMappings = []configs.IDMap{{0, 0, 1000}} config.GidMappings = []configs.IDMap{{0, 0, 1000}} config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWUSER}) } container, err := newContainer(config) ok(t, err) defer container.Destroy() stdinR, stdinW, err := os.Pipe() ok(t, err) process := &libcontainer.Process{ Cwd: "/", Args: []string{"cat"}, Env: standardEnvironment, Stdin: stdinR, } err = container.Run(process) stdinR.Close() defer stdinW.Close() ok(t, err) buffers := newStdBuffers() ps := &libcontainer.Process{ Cwd: "/", Args: []string{"/bin/sh", "-c", "ulimit -n"}, Env: standardEnvironment, Stdin: buffers.Stdin, Stdout: buffers.Stdout, Stderr: buffers.Stderr, Rlimits: []configs.Rlimit{ // increase process rlimit higher than container rlimit to test per-process limit {Type: syscall.RLIMIT_NOFILE, Hard: 1026, Soft: 1026}, }, } err = container.Run(ps) ok(t, err) waitProcess(ps, t) stdinW.Close() waitProcess(process, t) out := buffers.Stdout.String() if limit := strings.TrimSpace(out); limit != "1026" { t.Fatalf("expected rlimit to be 1026, got %s", limit) } } func TestExecInAdditionalGroups(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) config := newTemplateConfig(rootfs) container, err := newContainer(config) ok(t, err) defer container.Destroy() // Execute a first process in the container stdinR, stdinW, err := os.Pipe() ok(t, err) process := &libcontainer.Process{ Cwd: "/", Args: []string{"cat"}, Env: standardEnvironment, Stdin: stdinR, } err = container.Run(process) stdinR.Close() defer stdinW.Close() ok(t, err) var stdout bytes.Buffer pconfig := libcontainer.Process{ Cwd: "/", Args: []string{"sh", "-c", "id", "-Gn"}, Env: standardEnvironment, Stdin: nil, Stdout: &stdout, AdditionalGroups: []string{"plugdev", "audio"}, } err = container.Run(&pconfig) ok(t, err) // Wait for process waitProcess(&pconfig, t) stdinW.Close() waitProcess(process, t) outputGroups := string(stdout.Bytes()) // Check that the groups output has the groups that we specified if !strings.Contains(outputGroups, "audio") { t.Fatalf("Listed groups do not contain the audio group as expected: %v", outputGroups) } if !strings.Contains(outputGroups, "plugdev") { t.Fatalf("Listed groups do not contain the plugdev group as expected: %v", outputGroups) } } func TestExecInError(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) config := newTemplateConfig(rootfs) container, err := newContainer(config) ok(t, err) defer container.Destroy() // Execute a first process in the container stdinR, stdinW, err := os.Pipe() ok(t, err) process := &libcontainer.Process{ Cwd: "/", Args: []string{"cat"}, Env: standardEnvironment, Stdin: stdinR, } err = container.Run(process) stdinR.Close() defer func() { stdinW.Close() if _, err := process.Wait(); err != nil { t.Log(err) } }() ok(t, err) for i := 0; i < 42; i++ { var out bytes.Buffer unexistent := &libcontainer.Process{ Cwd: "/", Args: []string{"unexistent"}, Env: standardEnvironment, Stdout: &out, } err = container.Run(unexistent) if err == nil { t.Fatal("Should be an error") } if !strings.Contains(err.Error(), "executable file not found") { t.Fatalf("Should be error about not found executable, got %s", err) } if !bytes.Contains(out.Bytes(), []byte("executable file not found")) { t.Fatalf("executable file not found error not delivered to stdio:\n%s", out.String()) } } } func TestExecInTTY(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) config := newTemplateConfig(rootfs) container, err := newContainer(config) ok(t, err) defer container.Destroy() // Execute a first process in the container stdinR, stdinW, err := os.Pipe() ok(t, err) process := &libcontainer.Process{ Cwd: "/", Args: []string{"cat"}, Env: standardEnvironment, Stdin: stdinR, } err = container.Run(process) stdinR.Close() defer stdinW.Close() ok(t, err) var stdout bytes.Buffer ps := &libcontainer.Process{ Cwd: "/", Args: []string{"ps"}, Env: standardEnvironment, } console, err := ps.NewConsole(0, 0) copy := make(chan struct{}) go func() { io.Copy(&stdout, console) close(copy) }() ok(t, err) err = container.Run(ps) ok(t, err) select { case <-time.After(5 * time.Second): t.Fatal("Waiting for copy timed out") case <-copy: } waitProcess(ps, t) stdinW.Close() waitProcess(process, t) out := stdout.String() if !strings.Contains(out, "cat") || !strings.Contains(out, "ps") { t.Fatalf("unexpected running process, output %q", out) } if strings.Contains(out, "\r") { t.Fatalf("unexpected carriage-return in output") } } func TestExecInEnvironment(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) config := newTemplateConfig(rootfs) container, err := newContainer(config) ok(t, err) defer container.Destroy() // Execute a first process in the container stdinR, stdinW, err := os.Pipe() ok(t, err) process := &libcontainer.Process{ Cwd: "/", Args: []string{"cat"}, Env: standardEnvironment, Stdin: stdinR, } err = container.Run(process) stdinR.Close() defer stdinW.Close() ok(t, err) buffers := newStdBuffers() process2 := &libcontainer.Process{ Cwd: "/", Args: []string{"env"}, Env: []string{ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "DEBUG=true", "DEBUG=false", "ENV=test", }, Stdin: buffers.Stdin, Stdout: buffers.Stdout, Stderr: buffers.Stderr, } err = container.Run(process2) ok(t, err) waitProcess(process2, t) stdinW.Close() waitProcess(process, t) out := buffers.Stdout.String() // check execin's process environment if !strings.Contains(out, "DEBUG=false") || !strings.Contains(out, "ENV=test") || !strings.Contains(out, "HOME=/root") || !strings.Contains(out, "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin") || strings.Contains(out, "DEBUG=true") { t.Fatalf("unexpected running process, output %q", out) } } func TestExecinPassExtraFiles(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() if err != nil { t.Fatal(err) } defer remove(rootfs) config := newTemplateConfig(rootfs) container, err := newContainer(config) if err != nil { t.Fatal(err) } defer container.Destroy() // Execute a first process in the container stdinR, stdinW, err := os.Pipe() if err != nil { t.Fatal(err) } process := &libcontainer.Process{ Cwd: "/", Args: []string{"cat"}, Env: standardEnvironment, Stdin: stdinR, } err = container.Run(process) stdinR.Close() defer stdinW.Close() if err != nil { t.Fatal(err) } var stdout bytes.Buffer pipeout1, pipein1, err := os.Pipe() pipeout2, pipein2, err := os.Pipe() inprocess := &libcontainer.Process{ Cwd: "/", Args: []string{"sh", "-c", "cd /proc/$$/fd; echo -n *; echo -n 1 >3; echo -n 2 >4"}, Env: []string{"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"}, ExtraFiles: []*os.File{pipein1, pipein2}, Stdin: nil, Stdout: &stdout, } err = container.Run(inprocess) if err != nil { t.Fatal(err) } waitProcess(inprocess, t) stdinW.Close() waitProcess(process, t) out := string(stdout.Bytes()) // fd 5 is the directory handle for /proc/$$/fd if out != "0 1 2 3 4 5" { t.Fatalf("expected to have the file descriptors '0 1 2 3 4 5' passed to exec, got '%s'", out) } var buf = []byte{0} _, err = pipeout1.Read(buf) if err != nil { t.Fatal(err) } out1 := string(buf) if out1 != "1" { t.Fatalf("expected first pipe to receive '1', got '%s'", out1) } _, err = pipeout2.Read(buf) if err != nil { t.Fatal(err) } out2 := string(buf) if out2 != "2" { t.Fatalf("expected second pipe to receive '2', got '%s'", out2) } } func TestExecInOomScoreAdj(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) config := newTemplateConfig(rootfs) config.OomScoreAdj = 200 container, err := newContainer(config) ok(t, err) defer container.Destroy() stdinR, stdinW, err := os.Pipe() ok(t, err) process := &libcontainer.Process{ Cwd: "/", Args: []string{"cat"}, Env: standardEnvironment, Stdin: stdinR, } err = container.Run(process) stdinR.Close() defer stdinW.Close() ok(t, err) buffers := newStdBuffers() ps := &libcontainer.Process{ Cwd: "/", Args: []string{"/bin/sh", "-c", "cat /proc/self/oom_score_adj"}, Env: standardEnvironment, Stdin: buffers.Stdin, Stdout: buffers.Stdout, Stderr: buffers.Stderr, } err = container.Run(ps) ok(t, err) waitProcess(ps, t) stdinW.Close() waitProcess(process, t) out := buffers.Stdout.String() if oomScoreAdj := strings.TrimSpace(out); oomScoreAdj != strconv.Itoa(config.OomScoreAdj) { t.Fatalf("expected oomScoreAdj to be %d, got %s", config.OomScoreAdj, oomScoreAdj) } } func TestExecInUserns(t *testing.T) { if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { t.Skip("userns is unsupported") } if testing.Short() { return } rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) config := newTemplateConfig(rootfs) config.UidMappings = []configs.IDMap{{0, 0, 1000}} config.GidMappings = []configs.IDMap{{0, 0, 1000}} config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWUSER}) container, err := newContainer(config) ok(t, err) defer container.Destroy() // Execute a first process in the container stdinR, stdinW, err := os.Pipe() ok(t, err) process := &libcontainer.Process{ Cwd: "/", Args: []string{"cat"}, Env: standardEnvironment, Stdin: stdinR, } err = container.Run(process) stdinR.Close() defer stdinW.Close() ok(t, err) initPID, err := process.Pid() ok(t, err) initUserns, err := os.Readlink(fmt.Sprintf("/proc/%d/ns/user", initPID)) ok(t, err) buffers := newStdBuffers() process2 := &libcontainer.Process{ Cwd: "/", Args: []string{"readlink", "/proc/self/ns/user"}, Env: []string{ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", }, Stdout: buffers.Stdout, Stderr: os.Stderr, } err = container.Run(process2) ok(t, err) waitProcess(process2, t) stdinW.Close() waitProcess(process, t) if out := strings.TrimSpace(buffers.Stdout.String()); out != initUserns { t.Errorf("execin userns(%s), wanted %s", out, initUserns) } } docker-runc-tags-docker-1.13.1/libcontainer/integration/init_test.go000066400000000000000000000023721304443252500255450ustar00rootroot00000000000000package integration import ( "os" "runtime" "testing" "github.com/Sirupsen/logrus" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/cgroups/systemd" _ "github.com/opencontainers/runc/libcontainer/nsenter" ) // init runs the libcontainer initialization code because of the busybox style needs // to work around the go runtime and the issues with forking func init() { if len(os.Args) < 2 || os.Args[1] != "init" { return } runtime.GOMAXPROCS(1) runtime.LockOSThread() factory, err := libcontainer.New("") if err != nil { logrus.Fatalf("unable to initialize for container: %s", err) } if err := factory.StartInitialization(); err != nil { logrus.Fatal(err) } } var ( factory libcontainer.Factory systemdFactory libcontainer.Factory ) func TestMain(m *testing.M) { var ( err error ret int ) logrus.SetOutput(os.Stderr) logrus.SetLevel(logrus.InfoLevel) factory, err = libcontainer.New("/run/libctTests", libcontainer.Cgroupfs) if err != nil { logrus.Error(err) os.Exit(1) } if systemd.UseSystemd() { systemdFactory, err = libcontainer.New("/run/libctTests", libcontainer.SystemdCgroups) if err != nil { logrus.Error(err) os.Exit(1) } } ret = m.Run() os.Exit(ret) } docker-runc-tags-docker-1.13.1/libcontainer/integration/seccomp_test.go000066400000000000000000000105651304443252500262360ustar00rootroot00000000000000// +build linux,cgo,seccomp package integration import ( "strings" "syscall" "testing" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/configs" libseccomp "github.com/seccomp/libseccomp-golang" ) func TestSeccompDenyGetcwd(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() if err != nil { t.Fatal(err) } defer remove(rootfs) config := newTemplateConfig(rootfs) config.Seccomp = &configs.Seccomp{ DefaultAction: configs.Allow, Syscalls: []*configs.Syscall{ { Name: "getcwd", Action: configs.Errno, }, }, } container, err := newContainer(config) if err != nil { t.Fatal(err) } defer container.Destroy() buffers := newStdBuffers() pwd := &libcontainer.Process{ Cwd: "/", Args: []string{"pwd"}, Env: standardEnvironment, Stdin: buffers.Stdin, Stdout: buffers.Stdout, Stderr: buffers.Stderr, } err = container.Run(pwd) if err != nil { t.Fatal(err) } ps, err := pwd.Wait() if err == nil { t.Fatal("Expecting error (negative return code); instead exited cleanly!") } var exitCode int status := ps.Sys().(syscall.WaitStatus) if status.Exited() { exitCode = status.ExitStatus() } else if status.Signaled() { exitCode = -int(status.Signal()) } else { t.Fatalf("Unrecognized exit reason!") } if exitCode == 0 { t.Fatalf("Getcwd should fail with negative exit code, instead got %d!", exitCode) } expected := "pwd: getcwd: Operation not permitted" actual := strings.Trim(buffers.Stderr.String(), "\n") if actual != expected { t.Fatalf("Expected output %s but got %s\n", expected, actual) } } func TestSeccompPermitWriteConditional(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() if err != nil { t.Fatal(err) } defer remove(rootfs) config := newTemplateConfig(rootfs) config.Seccomp = &configs.Seccomp{ DefaultAction: configs.Allow, Syscalls: []*configs.Syscall{ { Name: "write", Action: configs.Errno, Args: []*configs.Arg{ { Index: 0, Value: 2, Op: configs.EqualTo, }, }, }, }, } container, err := newContainer(config) if err != nil { t.Fatal(err) } defer container.Destroy() buffers := newStdBuffers() dmesg := &libcontainer.Process{ Cwd: "/", Args: []string{"busybox", "ls", "/"}, Env: standardEnvironment, Stdin: buffers.Stdin, Stdout: buffers.Stdout, Stderr: buffers.Stderr, } err = container.Run(dmesg) if err != nil { t.Fatal(err) } if _, err := dmesg.Wait(); err != nil { t.Fatalf("%s: %s", err, buffers.Stderr) } } func TestSeccompDenyWriteConditional(t *testing.T) { if testing.Short() { return } // Only test if library version is v2.2.1 or higher // Conditional filtering will always error in v2.2.0 and lower major, minor, micro := libseccomp.GetLibraryVersion() if (major == 2 && minor < 2) || (major == 2 && minor == 2 && micro < 1) { return } rootfs, err := newRootfs() if err != nil { t.Fatal(err) } defer remove(rootfs) config := newTemplateConfig(rootfs) config.Seccomp = &configs.Seccomp{ DefaultAction: configs.Allow, Syscalls: []*configs.Syscall{ { Name: "write", Action: configs.Errno, Args: []*configs.Arg{ { Index: 0, Value: 2, Op: configs.EqualTo, }, }, }, }, } container, err := newContainer(config) if err != nil { t.Fatal(err) } defer container.Destroy() buffers := newStdBuffers() dmesg := &libcontainer.Process{ Cwd: "/", Args: []string{"busybox", "ls", "does_not_exist"}, Env: standardEnvironment, Stdin: buffers.Stdin, Stdout: buffers.Stdout, Stderr: buffers.Stderr, } err = container.Run(dmesg) if err != nil { t.Fatal(err) } ps, err := dmesg.Wait() if err == nil { t.Fatal("Expecting negative return, instead got 0!") } var exitCode int status := ps.Sys().(syscall.WaitStatus) if status.Exited() { exitCode = status.ExitStatus() } else if status.Signaled() { exitCode = -int(status.Signal()) } else { t.Fatalf("Unrecognized exit reason!") } if exitCode == 0 { t.Fatalf("Busybox should fail with negative exit code, instead got %d!", exitCode) } // We're denying write to stderr, so we expect an empty buffer expected := "" actual := strings.Trim(buffers.Stderr.String(), "\n") if actual != expected { t.Fatalf("Expected output %s but got %s\n", expected, actual) } } docker-runc-tags-docker-1.13.1/libcontainer/integration/template_test.go000066400000000000000000000056031304443252500264150ustar00rootroot00000000000000package integration import ( "syscall" "github.com/opencontainers/runc/libcontainer/configs" ) var standardEnvironment = []string{ "HOME=/root", "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "HOSTNAME=integration", "TERM=xterm", } const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV // newTemplateConfig returns a base template for running a container // // it uses a network strategy of just setting a loopback interface // and the default setup for devices func newTemplateConfig(rootfs string) *configs.Config { allowAllDevices := false return &configs.Config{ Rootfs: rootfs, Capabilities: []string{ "CAP_CHOWN", "CAP_DAC_OVERRIDE", "CAP_FSETID", "CAP_FOWNER", "CAP_MKNOD", "CAP_NET_RAW", "CAP_SETGID", "CAP_SETUID", "CAP_SETFCAP", "CAP_SETPCAP", "CAP_NET_BIND_SERVICE", "CAP_SYS_CHROOT", "CAP_KILL", "CAP_AUDIT_WRITE", }, Namespaces: configs.Namespaces([]configs.Namespace{ {Type: configs.NEWNS}, {Type: configs.NEWUTS}, {Type: configs.NEWIPC}, {Type: configs.NEWPID}, {Type: configs.NEWNET}, }), Cgroups: &configs.Cgroup{ Path: "integration/test", Resources: &configs.Resources{ MemorySwappiness: nil, AllowAllDevices: &allowAllDevices, AllowedDevices: configs.DefaultAllowedDevices, }, }, MaskPaths: []string{ "/proc/kcore", "/sys/firmware", }, ReadonlyPaths: []string{ "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus", }, Devices: configs.DefaultAutoCreatedDevices, Hostname: "integration", Mounts: []*configs.Mount{ { Source: "proc", Destination: "/proc", Device: "proc", Flags: defaultMountFlags, }, { Source: "tmpfs", Destination: "/dev", Device: "tmpfs", Flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, Data: "mode=755", }, { Source: "devpts", Destination: "/dev/pts", Device: "devpts", Flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, Data: "newinstance,ptmxmode=0666,mode=0620,gid=5", }, { Device: "tmpfs", Source: "shm", Destination: "/dev/shm", Data: "mode=1777,size=65536k", Flags: defaultMountFlags, }, /* CI is broken on the debian based kernels with this { Source: "mqueue", Destination: "/dev/mqueue", Device: "mqueue", Flags: defaultMountFlags, }, */ { Source: "sysfs", Destination: "/sys", Device: "sysfs", Flags: defaultMountFlags | syscall.MS_RDONLY, }, }, Networks: []*configs.Network{ { Type: "loopback", Address: "127.0.0.1/0", Gateway: "localhost", }, }, Rlimits: []configs.Rlimit{ { Type: syscall.RLIMIT_NOFILE, Hard: uint64(1025), Soft: uint64(1025), }, }, } } docker-runc-tags-docker-1.13.1/libcontainer/integration/utils_test.go000066400000000000000000000071721304443252500257450ustar00rootroot00000000000000package integration import ( "bytes" "crypto/md5" "encoding/hex" "fmt" "io/ioutil" "os" "os/exec" "path/filepath" "runtime" "strings" "syscall" "testing" "time" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/configs" ) func newStdBuffers() *stdBuffers { return &stdBuffers{ Stdin: bytes.NewBuffer(nil), Stdout: bytes.NewBuffer(nil), Stderr: bytes.NewBuffer(nil), } } type stdBuffers struct { Stdin *bytes.Buffer Stdout *bytes.Buffer Stderr *bytes.Buffer } func (b *stdBuffers) String() string { s := []string{} if b.Stderr != nil { s = append(s, b.Stderr.String()) } if b.Stdout != nil { s = append(s, b.Stdout.String()) } return strings.Join(s, "|") } // ok fails the test if an err is not nil. func ok(t testing.TB, err error) { if err != nil { _, file, line, _ := runtime.Caller(1) t.Fatalf("%s:%d: unexpected error: %s\n\n", filepath.Base(file), line, err.Error()) } } func waitProcess(p *libcontainer.Process, t *testing.T) { _, file, line, _ := runtime.Caller(1) status, err := p.Wait() if err != nil { t.Fatalf("%s:%d: unexpected error: %s\n\n", filepath.Base(file), line, err.Error()) } if !status.Success() { t.Fatalf("%s:%d: unexpected status: %s\n\n", filepath.Base(file), line, status.String()) } } func newTestRoot() (string, error) { dir, err := ioutil.TempDir("", "libcontainer") if err != nil { return "", err } if err := os.MkdirAll(dir, 0700); err != nil { return "", err } return dir, nil } // newRootfs creates a new tmp directory and copies the busybox root filesystem func newRootfs() (string, error) { dir, err := ioutil.TempDir("", "") if err != nil { return "", err } if err := os.MkdirAll(dir, 0700); err != nil { return "", err } if err := copyBusybox(dir); err != nil { return "", err } return dir, nil } func remove(dir string) { os.RemoveAll(dir) } // copyBusybox copies the rootfs for a busybox container created for the test image // into the new directory for the specific test func copyBusybox(dest string) error { out, err := exec.Command("sh", "-c", fmt.Sprintf("cp -R /busybox/* %s/", dest)).CombinedOutput() if err != nil { return fmt.Errorf("copy error %q: %q", err, out) } return nil } func newContainer(config *configs.Config) (libcontainer.Container, error) { h := md5.New() h.Write([]byte(time.Now().String())) return newContainerWithName(hex.EncodeToString(h.Sum(nil)), config) } func newContainerWithName(name string, config *configs.Config) (libcontainer.Container, error) { f := factory if config.Cgroups != nil && config.Cgroups.Parent == "system.slice" { f = systemdFactory } return f.Create(name, config) } // runContainer runs the container with the specific config and arguments // // buffers are returned containing the STDOUT and STDERR output for the run // along with the exit code and any go error func runContainer(config *configs.Config, console string, args ...string) (buffers *stdBuffers, exitCode int, err error) { container, err := newContainer(config) if err != nil { return nil, -1, err } defer container.Destroy() buffers = newStdBuffers() process := &libcontainer.Process{ Cwd: "/", Args: args, Env: standardEnvironment, Stdin: buffers.Stdin, Stdout: buffers.Stdout, Stderr: buffers.Stderr, } err = container.Run(process) if err != nil { return buffers, -1, err } ps, err := process.Wait() if err != nil { return buffers, -1, err } status := ps.Sys().(syscall.WaitStatus) if status.Exited() { exitCode = status.ExitStatus() } else if status.Signaled() { exitCode = -int(status.Signal()) } else { return buffers, -1, err } return } docker-runc-tags-docker-1.13.1/libcontainer/keys/000077500000000000000000000000001304443252500216405ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/libcontainer/keys/keyctl.go000066400000000000000000000031041304443252500234600ustar00rootroot00000000000000// +build linux package keys import ( "fmt" "strconv" "strings" "syscall" "unsafe" ) const KEYCTL_JOIN_SESSION_KEYRING = 1 const KEYCTL_SETPERM = 5 const KEYCTL_DESCRIBE = 6 type KeySerial uint32 func JoinSessionKeyring(name string) (KeySerial, error) { var _name *byte var err error if len(name) > 0 { _name, err = syscall.BytePtrFromString(name) if err != nil { return KeySerial(0), err } } sessKeyId, _, errn := syscall.Syscall(syscall.SYS_KEYCTL, KEYCTL_JOIN_SESSION_KEYRING, uintptr(unsafe.Pointer(_name)), 0) if errn != 0 { return 0, fmt.Errorf("could not create session key: %v", errn) } return KeySerial(sessKeyId), nil } // ModKeyringPerm modifies permissions on a keyring by reading the current permissions, // anding the bits with the given mask (clearing permissions) and setting // additional permission bits func ModKeyringPerm(ringId KeySerial, mask, setbits uint32) error { dest := make([]byte, 1024) destBytes := unsafe.Pointer(&dest[0]) if _, _, err := syscall.Syscall6(syscall.SYS_KEYCTL, uintptr(KEYCTL_DESCRIBE), uintptr(ringId), uintptr(destBytes), uintptr(len(dest)), 0, 0); err != 0 { return err } res := strings.Split(string(dest), ";") if len(res) < 5 { return fmt.Errorf("Destination buffer for key description is too small") } // parse permissions perm64, err := strconv.ParseUint(res[3], 16, 32) if err != nil { return err } perm := (uint32(perm64) & mask) | setbits if _, _, err := syscall.Syscall(syscall.SYS_KEYCTL, uintptr(KEYCTL_SETPERM), uintptr(ringId), uintptr(perm)); err != 0 { return err } return nil } docker-runc-tags-docker-1.13.1/libcontainer/label/000077500000000000000000000000001304443252500217445ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/libcontainer/label/label.go000066400000000000000000000033051304443252500233530ustar00rootroot00000000000000// +build !selinux !linux package label // InitLabels returns the process label and file labels to be used within // the container. A list of options can be passed into this function to alter // the labels. func InitLabels(options []string) (string, string, error) { return "", "", nil } func GetROMountLabel() string { return "" } func GenLabels(options string) (string, string, error) { return "", "", nil } func FormatMountLabel(src string, mountLabel string) string { return src } func SetProcessLabel(processLabel string) error { return nil } func GetFileLabel(path string) (string, error) { return "", nil } func SetFileLabel(path string, fileLabel string) error { return nil } func SetFileCreateLabel(fileLabel string) error { return nil } func Relabel(path string, fileLabel string, shared bool) error { return nil } func GetPidLabel(pid int) (string, error) { return "", nil } func Init() { } func ReserveLabel(label string) error { return nil } func UnreserveLabel(label string) error { return nil } // DupSecOpt takes a process label and returns security options that // can be used to set duplicate labels on future container processes func DupSecOpt(src string) []string { return nil } // DisableSecOpt returns a security opt that can disable labeling // support for future container processes func DisableSecOpt() []string { return nil } // Validate checks that the label does not include unexpected options func Validate(label string) error { return nil } // RelabelNeeded checks whether the user requested a relabel func RelabelNeeded(label string) bool { return false } // IsShared checks that the label includes a "shared" mark func IsShared(label string) bool { return false } docker-runc-tags-docker-1.13.1/libcontainer/label/label_selinux.go000066400000000000000000000137041304443252500251260ustar00rootroot00000000000000// +build selinux,linux package label import ( "fmt" "strings" "github.com/opencontainers/runc/libcontainer/selinux" ) // Valid Label Options var validOptions = map[string]bool{ "disable": true, "type": true, "user": true, "role": true, "level": true, } var ErrIncompatibleLabel = fmt.Errorf("Bad SELinux option z and Z can not be used together") // InitLabels returns the process label and file labels to be used within // the container. A list of options can be passed into this function to alter // the labels. The labels returned will include a random MCS String, that is // guaranteed to be unique. func InitLabels(options []string) (string, string, error) { if !selinux.SelinuxEnabled() { return "", "", nil } processLabel, mountLabel := selinux.GetLxcContexts() if processLabel != "" { pcon := selinux.NewContext(processLabel) mcon := selinux.NewContext(mountLabel) for _, opt := range options { if opt == "disable" { return "", "", nil } if i := strings.Index(opt, ":"); i == -1 { return "", "", fmt.Errorf("Bad label option %q, valid options 'disable' or \n'user, role, level, type' followed by ':' and a value", opt) } con := strings.SplitN(opt, ":", 2) if !validOptions[con[0]] { return "", "", fmt.Errorf("Bad label option %q, valid options 'disable, user, role, level, type'", con[0]) } pcon[con[0]] = con[1] if con[0] == "level" || con[0] == "user" { mcon[con[0]] = con[1] } } processLabel = pcon.Get() mountLabel = mcon.Get() } return processLabel, mountLabel, nil } func GetROMountLabel() string { return selinux.GetROFileLabel() } // DEPRECATED: The GenLabels function is only to be used during the transition to the official API. func GenLabels(options string) (string, string, error) { return InitLabels(strings.Fields(options)) } // FormatMountLabel returns a string to be used by the mount command. // The format of this string will be used to alter the labeling of the mountpoint. // The string returned is suitable to be used as the options field of the mount command. // If you need to have additional mount point options, you can pass them in as // the first parameter. Second parameter is the label that you wish to apply // to all content in the mount point. func FormatMountLabel(src, mountLabel string) string { if mountLabel != "" { switch src { case "": src = fmt.Sprintf("context=%q", mountLabel) default: src = fmt.Sprintf("%s,context=%q", src, mountLabel) } } return src } // SetProcessLabel takes a process label and tells the kernel to assign the // label to the next program executed by the current process. func SetProcessLabel(processLabel string) error { if processLabel == "" { return nil } return selinux.Setexeccon(processLabel) } // GetProcessLabel returns the process label that the kernel will assign // to the next program executed by the current process. If "" is returned // this indicates that the default labeling will happen for the process. func GetProcessLabel() (string, error) { return selinux.Getexeccon() } // GetFileLabel returns the label for specified path func GetFileLabel(path string) (string, error) { return selinux.Getfilecon(path) } // SetFileLabel modifies the "path" label to the specified file label func SetFileLabel(path string, fileLabel string) error { if selinux.SelinuxEnabled() && fileLabel != "" { return selinux.Setfilecon(path, fileLabel) } return nil } // SetFileCreateLabel tells the kernel the label for all files to be created func SetFileCreateLabel(fileLabel string) error { if selinux.SelinuxEnabled() { return selinux.Setfscreatecon(fileLabel) } return nil } // Relabel changes the label of path to the filelabel string. // It changes the MCS label to s0 if shared is true. // This will allow all containers to share the content. func Relabel(path string, fileLabel string, shared bool) error { if !selinux.SelinuxEnabled() { return nil } if fileLabel == "" { return nil } exclude_paths := map[string]bool{"/": true, "/usr": true, "/etc": true} if exclude_paths[path] { return fmt.Errorf("SELinux relabeling of %s is not allowed", path) } if shared { c := selinux.NewContext(fileLabel) c["level"] = "s0" fileLabel = c.Get() } if err := selinux.Chcon(path, fileLabel, true); err != nil { return fmt.Errorf("SELinux relabeling of %s is not allowed: %q", path, err) } return nil } // GetPidLabel will return the label of the process running with the specified pid func GetPidLabel(pid int) (string, error) { return selinux.Getpidcon(pid) } // Init initialises the labeling system func Init() { selinux.SelinuxEnabled() } // ReserveLabel will record the fact that the MCS label has already been used. // This will prevent InitLabels from using the MCS label in a newly created // container func ReserveLabel(label string) error { selinux.ReserveLabel(label) return nil } // UnreserveLabel will remove the reservation of the MCS label. // This will allow InitLabels to use the MCS label in a newly created // containers func UnreserveLabel(label string) error { selinux.FreeLxcContexts(label) return nil } // DupSecOpt takes an process label and returns security options that // can be used to set duplicate labels on future container processes func DupSecOpt(src string) []string { return selinux.DupSecOpt(src) } // DisableSecOpt returns a security opt that can disable labeling // support for future container processes func DisableSecOpt() []string { return selinux.DisableSecOpt() } // Validate checks that the label does not include unexpected options func Validate(label string) error { if strings.Contains(label, "z") && strings.Contains(label, "Z") { return ErrIncompatibleLabel } return nil } // RelabelNeeded checks whether the user requested a relabel func RelabelNeeded(label string) bool { return strings.Contains(label, "z") || strings.Contains(label, "Z") } // IsShared checks that the label includes a "shared" mark func IsShared(label string) bool { return strings.Contains(label, "z") } docker-runc-tags-docker-1.13.1/libcontainer/label/label_selinux_test.go000066400000000000000000000073071304443252500261670ustar00rootroot00000000000000// +build selinux,linux package label import ( "os" "strings" "testing" "github.com/opencontainers/runc/libcontainer/selinux" ) func TestInit(t *testing.T) { if selinux.SelinuxEnabled() { var testNull []string plabel, mlabel, err := InitLabels(testNull) if err != nil { t.Log("InitLabels Failed") t.Fatal(err) } testDisabled := []string{"disable"} roMountLabel := GetROMountLabel() if roMountLabel == "" { t.Errorf("GetROMountLabel Failed") } plabel, mlabel, err = InitLabels(testDisabled) if err != nil { t.Log("InitLabels Disabled Failed") t.Fatal(err) } if plabel != "" { t.Log("InitLabels Disabled Failed") t.FailNow() } testUser := []string{"user:user_u", "role:user_r", "type:user_t", "level:s0:c1,c15"} plabel, mlabel, err = InitLabels(testUser) if err != nil { t.Log("InitLabels User Failed") t.Fatal(err) } if plabel != "user_u:user_r:user_t:s0:c1,c15" || mlabel != "user_u:object_r:svirt_sandbox_file_t:s0:c1,c15" { t.Log("InitLabels User Match Failed") t.Log(plabel, mlabel) t.Fatal(err) } testBadData := []string{"user", "role:user_r", "type:user_t", "level:s0:c1,c15"} if _, _, err = InitLabels(testBadData); err == nil { t.Log("InitLabels Bad Failed") t.Fatal(err) } } } func TestDuplicateLabel(t *testing.T) { secopt := DupSecOpt("system_u:system_r:svirt_lxc_net_t:s0:c1,c2") t.Log(secopt) for _, opt := range secopt { parts := strings.SplitN(opt, "=", 2) if len(parts) != 2 || parts[0] != "label" { t.Errorf("Invalid DupSecOpt return value") continue } con := strings.SplitN(parts[1], ":", 2) if con[0] == "user" { if con[1] != "system_u" { t.Errorf("DupSecOpt Failed user incorrect") } continue } if con[0] == "role" { if con[1] != "system_r" { t.Errorf("DupSecOpt Failed role incorrect") } continue } if con[0] == "type" { if con[1] != "svirt_lxc_net_t" { t.Errorf("DupSecOpt Failed type incorrect") } continue } if con[0] == "level" { if con[1] != "s0:c1,c2" { t.Errorf("DupSecOpt Failed level incorrect") } continue } t.Errorf("DupSecOpt Failed invalid field %q", con[0]) } secopt = DisableSecOpt() if secopt[0] != "label=disable" { t.Errorf("DisableSecOpt Failed level incorrect") } } func TestRelabel(t *testing.T) { testdir := "/tmp/test" if err := os.Mkdir(testdir, 0755); err != nil { t.Fatal(err) } defer os.RemoveAll(testdir) label := "system_u:object_r:svirt_sandbox_file_t:s0:c1,c2" if err := Relabel(testdir, "", true); err != nil { t.Fatalf("Relabel with no label failed: %v", err) } if err := Relabel(testdir, label, true); err != nil { t.Fatalf("Relabel shared failed: %v", err) } if err := Relabel(testdir, label, false); err != nil { t.Fatalf("Relabel unshared failed: %v", err) } if err := Relabel("/etc", label, false); err == nil { t.Fatalf("Relabel /etc succeeded") } if err := Relabel("/", label, false); err == nil { t.Fatalf("Relabel / succeeded") } if err := Relabel("/usr", label, false); err == nil { t.Fatalf("Relabel /usr succeeded") } } func TestValidate(t *testing.T) { if err := Validate("zZ"); err != ErrIncompatibleLabel { t.Fatalf("Expected incompatible error, got %v", err) } if err := Validate("Z"); err != nil { t.Fatal(err) } if err := Validate("z"); err != nil { t.Fatal(err) } if err := Validate(""); err != nil { t.Fatal(err) } } func TestIsShared(t *testing.T) { if shared := IsShared("Z"); shared { t.Fatalf("Expected label `Z` to not be shared, got %v", shared) } if shared := IsShared("z"); !shared { t.Fatalf("Expected label `z` to be shared, got %v", shared) } if shared := IsShared("Zz"); !shared { t.Fatalf("Expected label `Zz` to be shared, got %v", shared) } } docker-runc-tags-docker-1.13.1/libcontainer/message_linux.go000066400000000000000000000040111304443252500240530ustar00rootroot00000000000000// +build linux package libcontainer import ( "syscall" "github.com/vishvananda/netlink/nl" ) // list of known message types we want to send to bootstrap program // The number is randomly chosen to not conflict with known netlink types const ( InitMsg uint16 = 62000 CloneFlagsAttr uint16 = 27281 ConsolePathAttr uint16 = 27282 NsPathsAttr uint16 = 27283 UidmapAttr uint16 = 27284 GidmapAttr uint16 = 27285 SetgroupAttr uint16 = 27286 // When syscall.NLA_HDRLEN is in gccgo, take this out. syscall_NLA_HDRLEN = (syscall.SizeofNlAttr + syscall.NLA_ALIGNTO - 1) & ^(syscall.NLA_ALIGNTO - 1) ) type Int32msg struct { Type uint16 Value uint32 } // Serialize serializes the message. // Int32msg has the following representation // | nlattr len | nlattr type | // | uint32 value | func (msg *Int32msg) Serialize() []byte { buf := make([]byte, msg.Len()) native := nl.NativeEndian() native.PutUint16(buf[0:2], uint16(msg.Len())) native.PutUint16(buf[2:4], msg.Type) native.PutUint32(buf[4:8], msg.Value) return buf } func (msg *Int32msg) Len() int { return syscall_NLA_HDRLEN + 4 } // Bytemsg has the following representation // | nlattr len | nlattr type | // | value | pad | type Bytemsg struct { Type uint16 Value []byte } func (msg *Bytemsg) Serialize() []byte { l := msg.Len() buf := make([]byte, (l+syscall.NLA_ALIGNTO-1) & ^(syscall.NLA_ALIGNTO-1)) native := nl.NativeEndian() native.PutUint16(buf[0:2], uint16(l)) native.PutUint16(buf[2:4], msg.Type) copy(buf[4:], msg.Value) return buf } func (msg *Bytemsg) Len() int { return syscall_NLA_HDRLEN + len(msg.Value) + 1 // null-terminated } type Boolmsg struct { Type uint16 Value bool } func (msg *Boolmsg) Serialize() []byte { buf := make([]byte, msg.Len()) native := nl.NativeEndian() native.PutUint16(buf[0:2], uint16(msg.Len())) native.PutUint16(buf[2:4], msg.Type) if msg.Value { buf[4] = 1 } else { buf[4] = 0 } return buf } func (msg *Boolmsg) Len() int { return syscall_NLA_HDRLEN + 1 } docker-runc-tags-docker-1.13.1/libcontainer/network_linux.go000066400000000000000000000146701304443252500241340ustar00rootroot00000000000000// +build linux package libcontainer import ( "fmt" "io/ioutil" "net" "path/filepath" "strconv" "strings" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/utils" "github.com/vishvananda/netlink" ) var strategies = map[string]networkStrategy{ "veth": &veth{}, "loopback": &loopback{}, } // networkStrategy represents a specific network configuration for // a container's networking stack type networkStrategy interface { create(*network, int) error initialize(*network) error detach(*configs.Network) error attach(*configs.Network) error } // getStrategy returns the specific network strategy for the // provided type. func getStrategy(tpe string) (networkStrategy, error) { s, exists := strategies[tpe] if !exists { return nil, fmt.Errorf("unknown strategy type %q", tpe) } return s, nil } // Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo. func getNetworkInterfaceStats(interfaceName string) (*NetworkInterface, error) { out := &NetworkInterface{Name: interfaceName} // This can happen if the network runtime information is missing - possible if the // container was created by an old version of libcontainer. if interfaceName == "" { return out, nil } type netStatsPair struct { // Where to write the output. Out *uint64 // The network stats file to read. File string } // Ingress for host veth is from the container. Hence tx_bytes stat on the host veth is actually number of bytes received by the container. netStats := []netStatsPair{ {Out: &out.RxBytes, File: "tx_bytes"}, {Out: &out.RxPackets, File: "tx_packets"}, {Out: &out.RxErrors, File: "tx_errors"}, {Out: &out.RxDropped, File: "tx_dropped"}, {Out: &out.TxBytes, File: "rx_bytes"}, {Out: &out.TxPackets, File: "rx_packets"}, {Out: &out.TxErrors, File: "rx_errors"}, {Out: &out.TxDropped, File: "rx_dropped"}, } for _, netStat := range netStats { data, err := readSysfsNetworkStats(interfaceName, netStat.File) if err != nil { return nil, err } *(netStat.Out) = data } return out, nil } // Reads the specified statistics available under /sys/class/net//statistics func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) { data, err := ioutil.ReadFile(filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile)) if err != nil { return 0, err } return strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64) } // loopback is a network strategy that provides a basic loopback device type loopback struct { } func (l *loopback) create(n *network, nspid int) error { return nil } func (l *loopback) initialize(config *network) error { return netlink.LinkSetUp(&netlink.Device{LinkAttrs: netlink.LinkAttrs{Name: "lo"}}) } func (l *loopback) attach(n *configs.Network) (err error) { return nil } func (l *loopback) detach(n *configs.Network) (err error) { return nil } // veth is a network strategy that uses a bridge and creates // a veth pair, one that is attached to the bridge on the host and the other // is placed inside the container's namespace type veth struct { } func (v *veth) detach(n *configs.Network) (err error) { return netlink.LinkSetMaster(&netlink.Device{LinkAttrs: netlink.LinkAttrs{Name: n.HostInterfaceName}}, nil) } // attach a container network interface to an external network func (v *veth) attach(n *configs.Network) (err error) { brl, err := netlink.LinkByName(n.Bridge) if err != nil { return err } br, ok := brl.(*netlink.Bridge) if !ok { return fmt.Errorf("Wrong device type %T", brl) } host, err := netlink.LinkByName(n.HostInterfaceName) if err != nil { return err } if err := netlink.LinkSetMaster(host, br); err != nil { return err } if err := netlink.LinkSetMTU(host, n.Mtu); err != nil { return err } if n.HairpinMode { if err := netlink.LinkSetHairpin(host, true); err != nil { return err } } if err := netlink.LinkSetUp(host); err != nil { return err } return nil } func (v *veth) create(n *network, nspid int) (err error) { tmpName, err := v.generateTempPeerName() if err != nil { return err } n.TempVethPeerName = tmpName if n.Bridge == "" { return fmt.Errorf("bridge is not specified") } veth := &netlink.Veth{ LinkAttrs: netlink.LinkAttrs{ Name: n.HostInterfaceName, TxQLen: n.TxQueueLen, }, PeerName: n.TempVethPeerName, } if err := netlink.LinkAdd(veth); err != nil { return err } defer func() { if err != nil { netlink.LinkDel(veth) } }() if err := v.attach(&n.Network); err != nil { return err } child, err := netlink.LinkByName(n.TempVethPeerName) if err != nil { return err } return netlink.LinkSetNsPid(child, nspid) } func (v *veth) generateTempPeerName() (string, error) { return utils.GenerateRandomName("veth", 7) } func (v *veth) initialize(config *network) error { peer := config.TempVethPeerName if peer == "" { return fmt.Errorf("peer is not specified") } child, err := netlink.LinkByName(peer) if err != nil { return err } if err := netlink.LinkSetDown(child); err != nil { return err } if err := netlink.LinkSetName(child, config.Name); err != nil { return err } // get the interface again after we changed the name as the index also changes. if child, err = netlink.LinkByName(config.Name); err != nil { return err } if config.MacAddress != "" { mac, err := net.ParseMAC(config.MacAddress) if err != nil { return err } if err := netlink.LinkSetHardwareAddr(child, mac); err != nil { return err } } ip, err := netlink.ParseAddr(config.Address) if err != nil { return err } if err := netlink.AddrAdd(child, ip); err != nil { return err } if config.IPv6Address != "" { ip6, err := netlink.ParseAddr(config.IPv6Address) if err != nil { return err } if err := netlink.AddrAdd(child, ip6); err != nil { return err } } if err := netlink.LinkSetMTU(child, config.Mtu); err != nil { return err } if err := netlink.LinkSetUp(child); err != nil { return err } if config.Gateway != "" { gw := net.ParseIP(config.Gateway) if err := netlink.RouteAdd(&netlink.Route{ Scope: netlink.SCOPE_UNIVERSE, LinkIndex: child.Attrs().Index, Gw: gw, }); err != nil { return err } } if config.IPv6Gateway != "" { gw := net.ParseIP(config.IPv6Gateway) if err := netlink.RouteAdd(&netlink.Route{ Scope: netlink.SCOPE_UNIVERSE, LinkIndex: child.Attrs().Index, Gw: gw, }); err != nil { return err } } return nil } docker-runc-tags-docker-1.13.1/libcontainer/notify_linux.go000066400000000000000000000041441304443252500237460ustar00rootroot00000000000000// +build linux package libcontainer import ( "fmt" "io/ioutil" "os" "path/filepath" "syscall" ) const oomCgroupName = "memory" type PressureLevel uint const ( LowPressure PressureLevel = iota MediumPressure CriticalPressure ) func registerMemoryEvent(cgDir string, evName string, arg string) (<-chan struct{}, error) { evFile, err := os.Open(filepath.Join(cgDir, evName)) if err != nil { return nil, err } fd, _, syserr := syscall.RawSyscall(syscall.SYS_EVENTFD2, 0, syscall.FD_CLOEXEC, 0) if syserr != 0 { evFile.Close() return nil, syserr } eventfd := os.NewFile(fd, "eventfd") eventControlPath := filepath.Join(cgDir, "cgroup.event_control") data := fmt.Sprintf("%d %d %s", eventfd.Fd(), evFile.Fd(), arg) if err := ioutil.WriteFile(eventControlPath, []byte(data), 0700); err != nil { eventfd.Close() evFile.Close() return nil, err } ch := make(chan struct{}) go func() { defer func() { close(ch) eventfd.Close() evFile.Close() }() buf := make([]byte, 8) for { if _, err := eventfd.Read(buf); err != nil { return } // When a cgroup is destroyed, an event is sent to eventfd. // So if the control path is gone, return instead of notifying. if _, err := os.Lstat(eventControlPath); os.IsNotExist(err) { return } ch <- struct{}{} } }() return ch, nil } // notifyOnOOM returns channel on which you can expect event about OOM, // if process died without OOM this channel will be closed. func notifyOnOOM(paths map[string]string) (<-chan struct{}, error) { dir := paths[oomCgroupName] if dir == "" { return nil, fmt.Errorf("path %q missing", oomCgroupName) } return registerMemoryEvent(dir, "memory.oom_control", "") } func notifyMemoryPressure(paths map[string]string, level PressureLevel) (<-chan struct{}, error) { dir := paths[oomCgroupName] if dir == "" { return nil, fmt.Errorf("path %q missing", oomCgroupName) } if level > CriticalPressure { return nil, fmt.Errorf("invalid pressure level %d", level) } levelStr := []string{"low", "medium", "critical"}[level] return registerMemoryEvent(dir, "memory.pressure_level", levelStr) } docker-runc-tags-docker-1.13.1/libcontainer/notify_linux_test.go000066400000000000000000000060451304443252500250070ustar00rootroot00000000000000// +build linux package libcontainer import ( "encoding/binary" "fmt" "io/ioutil" "os" "path/filepath" "syscall" "testing" "time" ) type notifyFunc func(paths map[string]string) (<-chan struct{}, error) func testMemoryNotification(t *testing.T, evName string, notify notifyFunc, targ string) { memoryPath, err := ioutil.TempDir("", "testmemnotification-"+evName) if err != nil { t.Fatal(err) } evFile := filepath.Join(memoryPath, evName) eventPath := filepath.Join(memoryPath, "cgroup.event_control") if err := ioutil.WriteFile(evFile, []byte{}, 0700); err != nil { t.Fatal(err) } if err := ioutil.WriteFile(eventPath, []byte{}, 0700); err != nil { t.Fatal(err) } paths := map[string]string{ "memory": memoryPath, } ch, err := notify(paths) if err != nil { t.Fatal("expected no error, got:", err) } data, err := ioutil.ReadFile(eventPath) if err != nil { t.Fatal("couldn't read event control file:", err) } var eventFd, evFd int var arg string if targ != "" { _, err = fmt.Sscanf(string(data), "%d %d %s", &eventFd, &evFd, &arg) } else { _, err = fmt.Sscanf(string(data), "%d %d", &eventFd, &evFd) } if err != nil || arg != targ { t.Fatalf("invalid control data %q: %s", data, err) } // re-open the eventfd efd, err := syscall.Dup(eventFd) if err != nil { t.Fatal("unable to reopen eventfd:", err) } defer syscall.Close(efd) if err != nil { t.Fatal("unable to dup event fd:", err) } buf := make([]byte, 8) binary.LittleEndian.PutUint64(buf, 1) if _, err := syscall.Write(efd, buf); err != nil { t.Fatal("unable to write to eventfd:", err) } select { case <-ch: case <-time.After(100 * time.Millisecond): t.Fatal("no notification on channel after 100ms") } // simulate what happens when a cgroup is destroyed by cleaning up and then // writing to the eventfd. if err := os.RemoveAll(memoryPath); err != nil { t.Fatal(err) } if _, err := syscall.Write(efd, buf); err != nil { t.Fatal("unable to write to eventfd:", err) } // give things a moment to shut down select { case _, ok := <-ch: if ok { t.Fatal("expected no notification to be triggered") } case <-time.After(100 * time.Millisecond): } if _, _, err := syscall.Syscall(syscall.SYS_FCNTL, uintptr(evFd), syscall.F_GETFD, 0); err != syscall.EBADF { t.Error("expected event control to be closed") } if _, _, err := syscall.Syscall(syscall.SYS_FCNTL, uintptr(eventFd), syscall.F_GETFD, 0); err != syscall.EBADF { t.Error("expected event fd to be closed") } } func TestNotifyOnOOM(t *testing.T) { f := func(paths map[string]string) (<-chan struct{}, error) { return notifyOnOOM(paths) } testMemoryNotification(t, "memory.oom_control", f, "") } func TestNotifyMemoryPressure(t *testing.T) { tests := map[PressureLevel]string{ LowPressure: "low", MediumPressure: "medium", CriticalPressure: "critical", } for level, arg := range tests { f := func(paths map[string]string) (<-chan struct{}, error) { return notifyMemoryPressure(paths, level) } testMemoryNotification(t, "memory.pressure_level", f, arg) } } docker-runc-tags-docker-1.13.1/libcontainer/nsenter/000077500000000000000000000000001304443252500223435ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/libcontainer/nsenter/README.md000066400000000000000000000043371304443252500236310ustar00rootroot00000000000000## nsenter The `nsenter` package registers a special init constructor that is called before the Go runtime has a chance to boot. This provides us the ability to `setns` on existing namespaces and avoid the issues that the Go runtime has with multiple threads. This constructor will be called if this package is registered, imported, in your go application. The `nsenter` package will `import "C"` and it uses [cgo](https://golang.org/cmd/cgo/) package. In cgo, if the import of "C" is immediately preceded by a comment, that comment, called the preamble, is used as a header when compiling the C parts of the package. So every time we import package `nsenter`, the C code function `nsexec()` would be called. And package `nsenter` is now only imported in `main_unix.go`, so every time before we call `cmd.Start` on linux, that C code would run. Because `nsexec()` must be run before the Go runtime in order to use the Linux kernel namespace, you must `import` this library into a package if you plan to use `libcontainer` directly. Otherwise Go will not execute the `nsexec()` constructor, which means that the re-exec will not cause the namespaces to be joined. You can import it like this: ```go import _ "github.com/opencontainers/runc/libcontainer/nsenter" ``` `nsexec()` will first get the file descriptor number for the init pipe from the environment variable `_LIBCONTAINER_INITPIPE` (which was opened by the parent and kept open across the fork-exec of the `nsexec()` init process). The init pipe is used to read bootstrap data (namespace paths, clone flags, uid and gid mappings, and the console path) from the parent process. `nsexec()` will then call `setns(2)` to join the namespaces provided in the bootstrap data (if available), `clone(2)` a child process with the provided clone flags, update the user and group ID mappings, do some further miscellaneous setup steps, and then send the PID of the child process to the parent of the `nsexec()` "caller". Finally, the parent `nsexec()` will exit and the child `nsexec()` process will return to allow the Go runtime take over. NOTE: We do both `setns(2)` and `clone(2)` even if we don't have any CLONE_NEW* clone flags because we must fork a new process in order to enter the PID namespace. docker-runc-tags-docker-1.13.1/libcontainer/nsenter/namespace.h000066400000000000000000000015111304443252500244460ustar00rootroot00000000000000#ifndef NSENTER_NAMESPACE_H #define NSENTER_NAMESPACE_H #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include /* All of these are taken from include/uapi/linux/sched.h */ #ifndef CLONE_NEWNS # define CLONE_NEWNS 0x00020000 /* New mount namespace group */ #endif #ifndef CLONE_NEWCGROUP # define CLONE_NEWCGROUP 0x02000000 /* New cgroup namespace */ #endif #ifndef CLONE_NEWUTS # define CLONE_NEWUTS 0x04000000 /* New utsname namespace */ #endif #ifndef CLONE_NEWIPC # define CLONE_NEWIPC 0x08000000 /* New ipc namespace */ #endif #ifndef CLONE_NEWUSER # define CLONE_NEWUSER 0x10000000 /* New user namespace */ #endif #ifndef CLONE_NEWPID # define CLONE_NEWPID 0x20000000 /* New pid namespace */ #endif #ifndef CLONE_NEWNET # define CLONE_NEWNET 0x40000000 /* New network namespace */ #endif #endif /* NSENTER_NAMESPACE_H */ docker-runc-tags-docker-1.13.1/libcontainer/nsenter/nsenter.go000066400000000000000000000002371304443252500243520ustar00rootroot00000000000000// +build linux,!gccgo package nsenter /* #cgo CFLAGS: -Wall extern void nsexec(); void __attribute__((constructor)) init(void) { nsexec(); } */ import "C" docker-runc-tags-docker-1.13.1/libcontainer/nsenter/nsenter_gccgo.go000066400000000000000000000007661304443252500255230ustar00rootroot00000000000000// +build linux,gccgo package nsenter /* #cgo CFLAGS: -Wall extern void nsexec(); void __attribute__((constructor)) init(void) { nsexec(); } */ import "C" // AlwaysFalse is here to stay false // (and be exported so the compiler doesn't optimize out its reference) var AlwaysFalse bool func init() { if AlwaysFalse { // by referencing this C init() in a noop test, it will ensure the compiler // links in the C function. // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65134 C.init() } } docker-runc-tags-docker-1.13.1/libcontainer/nsenter/nsenter_test.go000066400000000000000000000077501304443252500254200ustar00rootroot00000000000000package nsenter import ( "bytes" "encoding/json" "fmt" "io" "io/ioutil" "os" "os/exec" "strings" "syscall" "testing" "github.com/opencontainers/runc/libcontainer" "github.com/vishvananda/netlink/nl" ) type pid struct { Pid int `json:"Pid"` } func TestNsenterValidPaths(t *testing.T) { args := []string{"nsenter-exec"} parent, child, err := newPipe() if err != nil { t.Fatalf("failed to create pipe %v", err) } namespaces := []string{ // join pid ns of the current process fmt.Sprintf("pid:/proc/%d/ns/pid", os.Getpid()), } cmd := &exec.Cmd{ Path: os.Args[0], Args: args, ExtraFiles: []*os.File{child}, Env: []string{"_LIBCONTAINER_INITPIPE=3"}, Stdout: os.Stdout, Stderr: os.Stderr, } if err := cmd.Start(); err != nil { t.Fatalf("nsenter failed to start %v", err) } // write cloneFlags r := nl.NewNetlinkRequest(int(libcontainer.InitMsg), 0) r.AddData(&libcontainer.Int32msg{ Type: libcontainer.CloneFlagsAttr, Value: uint32(syscall.CLONE_NEWNET), }) r.AddData(&libcontainer.Bytemsg{ Type: libcontainer.NsPathsAttr, Value: []byte(strings.Join(namespaces, ",")), }) if _, err := io.Copy(parent, bytes.NewReader(r.Serialize())); err != nil { t.Fatal(err) } decoder := json.NewDecoder(parent) var pid *pid if err := cmd.Wait(); err != nil { t.Fatalf("nsenter exits with a non-zero exit status") } if err := decoder.Decode(&pid); err != nil { dir, _ := ioutil.ReadDir(fmt.Sprintf("/proc/%d/ns", os.Getpid())) for _, d := range dir { t.Log(d.Name()) } t.Fatalf("%v", err) } p, err := os.FindProcess(pid.Pid) if err != nil { t.Fatalf("%v", err) } p.Wait() } func TestNsenterInvalidPaths(t *testing.T) { args := []string{"nsenter-exec"} parent, child, err := newPipe() if err != nil { t.Fatalf("failed to create pipe %v", err) } namespaces := []string{ // join pid ns of the current process fmt.Sprintf("pid:/proc/%d/ns/pid", -1), } cmd := &exec.Cmd{ Path: os.Args[0], Args: args, ExtraFiles: []*os.File{child}, Env: []string{"_LIBCONTAINER_INITPIPE=3"}, } if err := cmd.Start(); err != nil { t.Fatal(err) } // write cloneFlags r := nl.NewNetlinkRequest(int(libcontainer.InitMsg), 0) r.AddData(&libcontainer.Int32msg{ Type: libcontainer.CloneFlagsAttr, Value: uint32(syscall.CLONE_NEWNET), }) r.AddData(&libcontainer.Bytemsg{ Type: libcontainer.NsPathsAttr, Value: []byte(strings.Join(namespaces, ",")), }) if _, err := io.Copy(parent, bytes.NewReader(r.Serialize())); err != nil { t.Fatal(err) } if err := cmd.Wait(); err == nil { t.Fatalf("nsenter exits with a zero exit status") } } func TestNsenterIncorrectPathType(t *testing.T) { args := []string{"nsenter-exec"} parent, child, err := newPipe() if err != nil { t.Fatalf("failed to create pipe %v", err) } namespaces := []string{ // join pid ns of the current process fmt.Sprintf("net:/proc/%d/ns/pid", os.Getpid()), } cmd := &exec.Cmd{ Path: os.Args[0], Args: args, ExtraFiles: []*os.File{child}, Env: []string{"_LIBCONTAINER_INITPIPE=3"}, } if err := cmd.Start(); err != nil { t.Fatal(err) } // write cloneFlags r := nl.NewNetlinkRequest(int(libcontainer.InitMsg), 0) r.AddData(&libcontainer.Int32msg{ Type: libcontainer.CloneFlagsAttr, Value: uint32(syscall.CLONE_NEWNET), }) r.AddData(&libcontainer.Bytemsg{ Type: libcontainer.NsPathsAttr, Value: []byte(strings.Join(namespaces, ",")), }) if _, err := io.Copy(parent, bytes.NewReader(r.Serialize())); err != nil { t.Fatal(err) } if err := cmd.Wait(); err == nil { t.Fatalf("nsenter exits with a zero exit status") } } func init() { if strings.HasPrefix(os.Args[0], "nsenter-") { os.Exit(0) } return } func newPipe() (parent *os.File, child *os.File, err error) { fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0) if err != nil { return nil, nil, err } return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil } docker-runc-tags-docker-1.13.1/libcontainer/nsenter/nsenter_unsupported.go000066400000000000000000000000631304443252500270170ustar00rootroot00000000000000// +build !linux !cgo package nsenter import "C" docker-runc-tags-docker-1.13.1/libcontainer/nsenter/nsexec.c000066400000000000000000000520351304443252500240010ustar00rootroot00000000000000#define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Get all of the CLONE_NEW* flags. */ #include "namespace.h" /* Synchronisation values. */ enum sync_t { SYNC_USERMAP_PLS = 0x40, /* Request parent to map our users. */ SYNC_USERMAP_ACK = 0x41, /* Mapping finished by the parent. */ SYNC_RECVPID_PLS = 0x42, /* Tell parent we're sending the PID. */ SYNC_RECVPID_ACK = 0x43, /* PID was correctly received by parent. */ /* XXX: This doesn't help with segfaults and other such issues. */ SYNC_ERR = 0xFF, /* Fatal error, no turning back. The error code follows. */ }; /* longjmp() arguments. */ #define JUMP_PARENT 0x00 #define JUMP_CHILD 0xA0 #define JUMP_INIT 0xA1 /* JSON buffer. */ #define JSON_MAX 4096 /* Assume the stack grows down, so arguments should be above it. */ struct clone_t { /* * Reserve some space for clone() to locate arguments * and retcode in this place */ char stack[4096] __attribute__ ((aligned(16))); char stack_ptr[0]; /* There's two children. This is used to execute the different code. */ jmp_buf *env; int jmpval; }; struct nlconfig_t { char *data; uint32_t cloneflags; char *uidmap; size_t uidmap_len; char *gidmap; size_t gidmap_len; char *namespaces; size_t namespaces_len; uint8_t is_setgroup; int consolefd; }; /* * List of netlink message types sent to us as part of bootstrapping the init. * These constants are defined in libcontainer/message_linux.go. */ #define INIT_MSG 62000 #define CLONE_FLAGS_ATTR 27281 #define CONSOLE_PATH_ATTR 27282 #define NS_PATHS_ATTR 27283 #define UIDMAP_ATTR 27284 #define GIDMAP_ATTR 27285 #define SETGROUP_ATTR 27286 /* * Use the raw syscall for versions of glibc which don't include a function for * it, namely (glibc 2.12). */ #if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14 # define _GNU_SOURCE # include "syscall.h" # if !defined(SYS_setns) && defined(__NR_setns) # define SYS_setns __NR_setns # endif #ifndef SYS_setns # error "setns(2) syscall not supported by glibc version" #endif int setns(int fd, int nstype) { return syscall(SYS_setns, fd, nstype); } #endif /* XXX: This is ugly. */ static int syncfd = -1; /* TODO(cyphar): Fix this so it correctly deals with syncT. */ #define bail(fmt, ...) \ do { \ int ret = __COUNTER__ + 1; \ fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__); \ if (syncfd >= 0) { \ enum sync_t s = SYNC_ERR; \ if (write(syncfd, &s, sizeof(s)) != sizeof(s)) \ fprintf(stderr, "nsenter: failed: write(s)"); \ if (write(syncfd, &ret, sizeof(ret)) != sizeof(ret)) \ fprintf(stderr, "nsenter: failed: write(ret)"); \ } \ exit(ret); \ } while(0) static int write_file(char *data, size_t data_len, char *pathfmt, ...) { int fd, len, ret = 0; char path[PATH_MAX]; va_list ap; va_start(ap, pathfmt); len = vsnprintf(path, PATH_MAX, pathfmt, ap); va_end(ap); if (len < 0) return -1; fd = open(path, O_RDWR); if (fd < 0) { ret = -1; goto out; } len = write(fd, data, data_len); if (len != data_len) { ret = -1; goto out; } out: close(fd); return ret; } enum policy_t { SETGROUPS_DEFAULT = 0, SETGROUPS_ALLOW, SETGROUPS_DENY, }; /* This *must* be called before we touch gid_map. */ static void update_setgroups(int pid, enum policy_t setgroup) { char *policy; switch (setgroup) { case SETGROUPS_ALLOW: policy = "allow"; break; case SETGROUPS_DENY: policy = "deny"; break; case SETGROUPS_DEFAULT: /* Nothing to do. */ return; } if (write_file(policy, strlen(policy), "/proc/%d/setgroups", pid) < 0) { /* * If the kernel is too old to support /proc/pid/setgroups, * open(2) or write(2) will return ENOENT. This is fine. */ if (errno != ENOENT) bail("failed to write '%s' to /proc/%d/setgroups", policy, pid); } } static void update_uidmap(int pid, char *map, int map_len) { if (map == NULL || map_len <= 0) return; if (write_file(map, map_len, "/proc/%d/uid_map", pid) < 0) bail("failed to update /proc/%d/uid_map", pid); } static void update_gidmap(int pid, char *map, int map_len) { if (map == NULL || map_len <= 0) return; if (write_file(map, map_len, "/proc/%d/gid_map", pid) < 0) bail("failed to update /proc/%d/gid_map", pid); } /* A dummy function that just jumps to the given jumpval. */ static int child_func(void *arg) __attribute__ ((noinline)); static int child_func(void *arg) { struct clone_t *ca = (struct clone_t *)arg; longjmp(*ca->env, ca->jmpval); } static int clone_parent(jmp_buf *env, int jmpval) __attribute__ ((noinline)); static int clone_parent(jmp_buf *env, int jmpval) { struct clone_t ca = { .env = env, .jmpval = jmpval, }; return clone(child_func, ca.stack_ptr, CLONE_PARENT | SIGCHLD, &ca); } /* * Gets the init pipe fd from the environment, which is used to read the * bootstrap data and tell the parent what the new pid is after we finish * setting up the environment. */ static int initpipe(void) { int pipenum; char *initpipe, *endptr; initpipe = getenv("_LIBCONTAINER_INITPIPE"); if (initpipe == NULL || *initpipe == '\0') return -1; pipenum = strtol(initpipe, &endptr, 10); if (*endptr != '\0') bail("unable to parse _LIBCONTAINER_INITPIPE"); return pipenum; } /* Returns the clone(2) flag for a namespace, given the name of a namespace. */ static int nsflag(char *name) { if (!strcmp(name, "cgroup")) return CLONE_NEWCGROUP; else if (!strcmp(name, "ipc")) return CLONE_NEWIPC; else if (!strcmp(name, "mnt")) return CLONE_NEWNS; else if (!strcmp(name, "net")) return CLONE_NEWNET; else if (!strcmp(name, "pid")) return CLONE_NEWPID; else if (!strcmp(name, "user")) return CLONE_NEWUSER; else if (!strcmp(name, "uts")) return CLONE_NEWUTS; /* If we don't recognise a name, fallback to 0. */ return 0; } static uint32_t readint32(char *buf) { return *(uint32_t *) buf; } static uint8_t readint8(char *buf) { return *(uint8_t *) buf; } static void nl_parse(int fd, struct nlconfig_t *config) { size_t len, size; struct nlmsghdr hdr; char *data, *current; /* Retrieve the netlink header. */ len = read(fd, &hdr, NLMSG_HDRLEN); if (len != NLMSG_HDRLEN) bail("invalid netlink header length %lu", len); if (hdr.nlmsg_type == NLMSG_ERROR) bail("failed to read netlink message"); if (hdr.nlmsg_type != INIT_MSG) bail("unexpected msg type %d", hdr.nlmsg_type); /* Retrieve data. */ size = NLMSG_PAYLOAD(&hdr, 0); current = data = malloc(size); if (!data) bail("failed to allocate %zu bytes of memory for nl_payload", size); len = read(fd, data, size); if (len != size) bail("failed to read netlink payload, %lu != %lu", len, size); /* Parse the netlink payload. */ config->data = data; config->consolefd = -1; while (current < data + size) { struct nlattr *nlattr = (struct nlattr *)current; size_t payload_len = nlattr->nla_len - NLA_HDRLEN; /* Advance to payload. */ current += NLA_HDRLEN; /* Handle payload. */ switch (nlattr->nla_type) { case CLONE_FLAGS_ATTR: config->cloneflags = readint32(current); break; case CONSOLE_PATH_ATTR: /* * We open the console here because we currently evaluate console * paths from the *host* namespaces. */ config->consolefd = open(current, O_RDWR); if (config->consolefd < 0) bail("failed to open console %s", current); break; case NS_PATHS_ATTR: config->namespaces = current; config->namespaces_len = payload_len; break; case UIDMAP_ATTR: config->uidmap = current; config->uidmap_len = payload_len; break; case GIDMAP_ATTR: config->gidmap = current; config->gidmap_len = payload_len; break; case SETGROUP_ATTR: config->is_setgroup = readint8(current); break; default: bail("unknown netlink message type %d", nlattr->nla_type); } current += NLA_ALIGN(payload_len); } } void nl_free(struct nlconfig_t *config) { free(config->data); } void join_namespaces(char *nslist) { int num = 0, i; char *saveptr = NULL; char *namespace = strtok_r(nslist, ",", &saveptr); struct namespace_t { int fd; int ns; char type[PATH_MAX]; char path[PATH_MAX]; } *namespaces = NULL; if (!namespace || !strlen(namespace) || !strlen(nslist)) bail("ns paths are empty"); /* * We have to open the file descriptors first, since after * we join the mnt namespace we might no longer be able to * access the paths. */ do { int fd; char *path; struct namespace_t *ns; /* Resize the namespace array. */ namespaces = realloc(namespaces, ++num * sizeof(struct namespace_t)); if (!namespaces) bail("failed to reallocate namespace array"); ns = &namespaces[num - 1]; /* Split 'ns:path'. */ path = strstr(namespace, ":"); if (!path) bail("failed to parse %s", namespace); *path++ = '\0'; fd = open(path, O_RDONLY); if (fd < 0) bail("failed to open %s", namespace); ns->fd = fd; ns->ns = nsflag(namespace); strncpy(ns->path, path, PATH_MAX); } while ((namespace = strtok_r(NULL, ",", &saveptr)) != NULL); /* * The ordering in which we join namespaces is important. We should * always join the user namespace *first*. This is all guaranteed * from the container_linux.go side of this, so we're just going to * follow the order given to us. */ for (i = 0; i < num; i++) { struct namespace_t ns = namespaces[i]; if (setns(ns.fd, ns.ns) < 0) bail("failed to setns to %s", ns.path); close(ns.fd); } free(namespaces); } void nsexec(void) { int pipenum; jmp_buf env; int syncpipe[2]; struct nlconfig_t config = {0}; /* * If we don't have an init pipe, just return to the go routine. * We'll only get an init pipe for start or exec. */ pipenum = initpipe(); if (pipenum == -1) return; /* make the process non-dumpable */ if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) != 0) { bail("failed to set process as non-dumpable"); } /* Parse all of the netlink configuration. */ nl_parse(pipenum, &config); /* Pipe so we can tell the child when we've finished setting up. */ if (socketpair(AF_LOCAL, SOCK_STREAM, 0, syncpipe) < 0) bail("failed to setup sync pipe between parent and child"); /* TODO: Currently we aren't dealing with child deaths properly. */ /* * Okay, so this is quite annoying. * * In order for this unsharing code to be more extensible we need to split * up unshare(CLONE_NEWUSER) and clone() in various ways. The ideal case * would be if we did clone(CLONE_NEWUSER) and the other namespaces * separately, but because of SELinux issues we cannot really do that. But * we cannot just dump the namespace flags into clone(...) because several * usecases (such as rootless containers) require more granularity around * the namespace setup. In addition, some older kernels had issues where * CLONE_NEWUSER wasn't handled before other namespaces (but we cannot * handle this while also dealing with SELinux so we choose SELinux support * over broken kernel support). * * However, if we unshare(2) the user namespace *before* we clone(2), then * all hell breaks loose. * * The parent no longer has permissions to do many things (unshare(2) drops * all capabilities in your old namespace), and the container cannot be set * up to have more than one {uid,gid} mapping. This is obviously less than * ideal. In order to fix this, we have to first clone(2) and then unshare. * * Unfortunately, it's not as simple as that. We have to fork to enter the * PID namespace (the PID namespace only applies to children). Since we'll * have to double-fork, this clone_parent() call won't be able to get the * PID of the _actual_ init process (without doing more synchronisation than * I can deal with at the moment). So we'll just get the parent to send it * for us, the only job of this process is to update * /proc/pid/{setgroups,uid_map,gid_map}. * * And as a result of the above, we also need to setns(2) in the first child * because if we join a PID namespace in the topmost parent then our child * will be in that namespace (and it will not be able to give us a PID value * that makes sense without resorting to sending things with cmsg). * * This also deals with an older issue caused by dumping cloneflags into * clone(2): On old kernels, CLONE_PARENT didn't work with CLONE_NEWPID, so * we have to unshare(2) before clone(2) in order to do this. This was fixed * in upstream commit 1f7f4dde5c945f41a7abc2285be43d918029ecc5, and was * introduced by 40a0d32d1eaffe6aac7324ca92604b6b3977eb0e. As far as we're * aware, the last mainline kernel which had this bug was Linux 3.12. * However, we cannot comment on which kernels the broken patch was * backported to. * * -- Aleksa "what has my life come to?" Sarai */ switch (setjmp(env)) { /* * Stage 0: We're in the parent. Our job is just to create a new child * (stage 1: JUMP_CHILD) process and write its uid_map and * gid_map. That process will go on to create a new process, then * it will send us its PID which we will send to the bootstrap * process. */ case JUMP_PARENT: { int len; pid_t child; char buf[JSON_MAX]; /* For debugging. */ prctl(PR_SET_NAME, (unsigned long) "runc:[0:PARENT]", 0, 0, 0); /* Start the process of getting a container. */ child = clone_parent(&env, JUMP_CHILD); if (child < 0) bail("unable to fork: child_func"); /* State machine for synchronisation with the children. */ while (true) { enum sync_t s; /* This doesn't need to be global, we're in the parent. */ int syncfd = syncpipe[1]; if (read(syncfd, &s, sizeof(s)) != sizeof(s)) bail("failed to sync with child: next state"); switch (s) { case SYNC_ERR: { /* We have to mirror the error code of the child. */ int ret; if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret)) bail("failed to sync with child: read(error code)"); exit(ret); } break; case SYNC_USERMAP_PLS: /* Enable setgroups(2) if we've been asked to. */ if (config.is_setgroup) update_setgroups(child, SETGROUPS_ALLOW); /* Set up mappings. */ update_uidmap(child, config.uidmap, config.uidmap_len); update_gidmap(child, config.gidmap, config.gidmap_len); s = SYNC_USERMAP_ACK; if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { kill(child, SIGKILL); bail("failed to sync with child: write(SYNC_USERMAP_ACK)"); } break; case SYNC_USERMAP_ACK: /* We should _never_ receive acks. */ kill(child, SIGKILL); bail("failed to sync with child: unexpected SYNC_USERMAP_ACK"); break; case SYNC_RECVPID_PLS: { pid_t old = child; /* Get the init_func pid. */ if (read(syncfd, &child, sizeof(child)) != sizeof(child)) { kill(old, SIGKILL); bail("failed to sync with child: read(childpid)"); } /* Send ACK. */ s = SYNC_RECVPID_ACK; if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { kill(old, SIGKILL); kill(child, SIGKILL); bail("failed to sync with child: write(SYNC_RECVPID_ACK)"); } } /* Leave the loop. */ goto out; case SYNC_RECVPID_ACK: /* We should _never_ receive acks. */ kill(child, SIGKILL); bail("failed to sync with child: unexpected SYNC_RECVPID_ACK"); break; } } out: /* Send the init_func pid back to our parent. */ len = snprintf(buf, JSON_MAX, "{\"pid\": %d}\n", child); if (len < 0) { kill(child, SIGKILL); bail("unable to generate JSON for child pid"); } if (write(pipenum, buf, len) != len) { kill(child, SIGKILL); bail("unable to send child pid to bootstrapper"); } exit(0); } /* * Stage 1: We're in the first child process. Our job is to join any * provided namespaces in the netlink payload and unshare all * of the requested namespaces. If we've been asked to * CLONE_NEWUSER, we will ask our parent (stage 0) to set up * our user mappings for us. Then, we create a new child * (stage 2: JUMP_INIT) for PID namespace. We then send the * child's PID to our parent (stage 0). */ case JUMP_CHILD: { pid_t child; enum sync_t s; /* We're in a child and thus need to tell the parent if we die. */ syncfd = syncpipe[0]; /* For debugging. */ prctl(PR_SET_NAME, (unsigned long) "runc:[1:CHILD]", 0, 0, 0); /* * We need to setns first. We cannot do this earlier (in stage 0) * because of the fact that we forked to get here (the PID of * [stage 2: JUMP_INIT]) would be meaningless). We could send it * using cmsg(3) but that's just annoying. */ if (config.namespaces) join_namespaces(config.namespaces); /* * Unshare all of the namespaces. Now, it should be noted that this * ordering might break in the future (especially with rootless * containers). But for now, it's not possible to split this into * CLONE_NEWUSER + [the rest] because of some RHEL SELinux issues. * * Note that we don't merge this with clone() because there were * some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID) * was broken, so we'll just do it the long way anyway. */ if (unshare(config.cloneflags) < 0) bail("failed to unshare namespaces"); /* * Deal with user namespaces first. They are quite special, as they * affect our ability to unshare other namespaces and are used as * context for privilege checks. */ if (config.cloneflags & CLONE_NEWUSER) { /* * We don't have the privileges to do any mapping here (see the * clone_parent rant). So signal our parent to hook us up. */ s = SYNC_USERMAP_PLS; if (write(syncfd, &s, sizeof(s)) != sizeof(s)) bail("failed to sync with parent: write(SYNC_USERMAP_PLS)"); /* ... wait for mapping ... */ if (read(syncfd, &s, sizeof(s)) != sizeof(s)) bail("failed to sync with parent: read(SYNC_USERMAP_ACK)"); if (s != SYNC_USERMAP_ACK) bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s); } /* * TODO: What about non-namespace clone flags that we're dropping here? * * We fork again because of PID namespace, setns(2) or unshare(2) don't * change the PID namespace of the calling process, because doing so * would change the caller's idea of its own PID (as reported by getpid()), * which would break many applications and libraries, so we must fork * to actually enter the new PID namespace. */ child = clone_parent(&env, JUMP_INIT); if (child < 0) bail("unable to fork: init_func"); /* Send the child to our parent, which knows what it's doing. */ s = SYNC_RECVPID_PLS; if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { kill(child, SIGKILL); bail("failed to sync with parent: write(SYNC_RECVPID_PLS)"); } if (write(syncfd, &child, sizeof(child)) != sizeof(child)) { kill(child, SIGKILL); bail("failed to sync with parent: write(childpid)"); } /* ... wait for parent to get the pid ... */ if (read(syncfd, &s, sizeof(s)) != sizeof(s)) { kill(child, SIGKILL); bail("failed to sync with parent: read(SYNC_RECVPID_ACK)"); } if (s != SYNC_RECVPID_ACK) { kill(child, SIGKILL); bail("failed to sync with parent: SYNC_RECVPID_ACK: got %u", s); } /* Our work is done. [Stage 2: JUMP_INIT] is doing the rest of the work. */ exit(0); } /* * Stage 2: We're the final child process, and the only process that will * actually return to the Go runtime. Our job is to just do the * final cleanup steps and then return to the Go runtime to allow * init_linux.go to run. */ case JUMP_INIT: { /* * We're inside the child now, having jumped from the * start_child() code after forking in the parent. */ int consolefd = config.consolefd; /* We're in a child and thus need to tell the parent if we die. */ syncfd = syncpipe[0]; /* For debugging. */ prctl(PR_SET_NAME, (unsigned long) "runc:[2:INIT]", 0, 0, 0); if (setsid() < 0) bail("setsid failed"); if (setuid(0) < 0) bail("setuid failed"); if (setgid(0) < 0) bail("setgid failed"); if (setgroups(0, NULL) < 0) bail("setgroups failed"); if (consolefd != -1) { if (ioctl(consolefd, TIOCSCTTY, 0) < 0) bail("ioctl TIOCSCTTY failed"); if (dup3(consolefd, STDIN_FILENO, 0) != STDIN_FILENO) bail("failed to dup stdin"); if (dup3(consolefd, STDOUT_FILENO, 0) != STDOUT_FILENO) bail("failed to dup stdout"); if (dup3(consolefd, STDERR_FILENO, 0) != STDERR_FILENO) bail("failed to dup stderr"); } /* Close sync pipes. */ close(syncpipe[0]); close(syncpipe[1]); /* Free netlink data. */ nl_free(&config); /* Finish executing, let the Go runtime take over. */ return; } default: bail("unexpected jump value"); break; } /* Should never be reached. */ bail("should never be reached"); } docker-runc-tags-docker-1.13.1/libcontainer/process.go000066400000000000000000000070621304443252500226770ustar00rootroot00000000000000package libcontainer import ( "fmt" "io" "math" "os" "github.com/opencontainers/runc/libcontainer/configs" ) type processOperations interface { wait() (*os.ProcessState, error) signal(sig os.Signal) error pid() int } // Process specifies the configuration and IO for a process inside // a container. type Process struct { // The command to be run followed by any arguments. Args []string // Env specifies the environment variables for the process. Env []string // User will set the uid and gid of the executing process running inside the container // local to the container's user and group configuration. User string // AdditionalGroups specifies the gids that should be added to supplementary groups // in addition to those that the user belongs to. AdditionalGroups []string // Cwd will change the processes current working directory inside the container's rootfs. Cwd string // Stdin is a pointer to a reader which provides the standard input stream. Stdin io.Reader // Stdout is a pointer to a writer which receives the standard output stream. Stdout io.Writer // Stderr is a pointer to a writer which receives the standard error stream. Stderr io.Writer // ExtraFiles specifies additional open files to be inherited by the container ExtraFiles []*os.File // consolePath is the path to the console allocated to the container. consolePath string // Capabilities specify the capabilities to keep when executing the process inside the container // All capabilities not specified will be dropped from the processes capability mask Capabilities []string // AppArmorProfile specifies the profile to apply to the process and is // changed at the time the process is execed AppArmorProfile string // Label specifies the label to apply to the process. It is commonly used by selinux Label string // NoNewPrivileges controls whether processes can gain additional privileges. NoNewPrivileges *bool // Rlimits specifies the resource limits, such as max open files, to set in the container // If Rlimits are not set, the container will inherit rlimits from the parent process Rlimits []configs.Rlimit ops processOperations } // Wait waits for the process to exit. // Wait releases any resources associated with the Process func (p Process) Wait() (*os.ProcessState, error) { if p.ops == nil { return nil, newGenericError(fmt.Errorf("invalid process"), NoProcessOps) } return p.ops.wait() } // Pid returns the process ID func (p Process) Pid() (int, error) { // math.MinInt32 is returned here, because it's invalid value // for the kill() system call. if p.ops == nil { return math.MinInt32, newGenericError(fmt.Errorf("invalid process"), NoProcessOps) } return p.ops.pid(), nil } // Signal sends a signal to the Process. func (p Process) Signal(sig os.Signal) error { if p.ops == nil { return newGenericError(fmt.Errorf("invalid process"), NoProcessOps) } return p.ops.signal(sig) } // IO holds the process's STDIO type IO struct { Stdin io.WriteCloser Stdout io.ReadCloser Stderr io.ReadCloser } // NewConsole creates new console for process and returns it func (p *Process) NewConsole(rootuid, rootgid int) (Console, error) { console, err := NewConsole(rootuid, rootgid) if err != nil { return nil, err } p.consolePath = console.Path() return console, nil } // ConsoleFromPath sets the process's console with the path provided func (p *Process) ConsoleFromPath(path string) error { if p.consolePath != "" { return newGenericError(fmt.Errorf("console path already exists for process"), ConsoleExists) } p.consolePath = path return nil } docker-runc-tags-docker-1.13.1/libcontainer/process_linux.go000066400000000000000000000325571304443252500241250ustar00rootroot00000000000000// +build linux package libcontainer import ( "encoding/json" "errors" "fmt" "io" "os" "os/exec" "path/filepath" "strconv" "syscall" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/runc/libcontainer/utils" ) type parentProcess interface { // pid returns the pid for the running process. pid() int // start starts the process execution. start() error // send a SIGKILL to the process and wait for the exit. terminate() error // wait waits on the process returning the process state. wait() (*os.ProcessState, error) // startTime returns the process start time. startTime() (string, error) signal(os.Signal) error externalDescriptors() []string setExternalDescriptors(fds []string) } type setnsProcess struct { cmd *exec.Cmd parentPipe *os.File childPipe *os.File cgroupPaths map[string]string config *initConfig fds []string process *Process bootstrapData io.Reader rootDir *os.File } func (p *setnsProcess) startTime() (string, error) { return system.GetProcessStartTime(p.pid()) } func (p *setnsProcess) signal(sig os.Signal) error { s, ok := sig.(syscall.Signal) if !ok { return errors.New("os: unsupported signal type") } return syscall.Kill(p.pid(), s) } func (p *setnsProcess) start() (err error) { defer p.parentPipe.Close() err = p.cmd.Start() p.childPipe.Close() p.rootDir.Close() if err != nil { return newSystemErrorWithCause(err, "starting setns process") } if p.bootstrapData != nil { if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil { return newSystemErrorWithCause(err, "copying bootstrap data to pipe") } } if err = p.execSetns(); err != nil { return newSystemErrorWithCause(err, "executing setns process") } if len(p.cgroupPaths) > 0 { if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil { return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid()) } } // set oom_score_adj if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil { return newSystemErrorWithCause(err, "setting oom score") } // set rlimits, this has to be done here because we lose permissions // to raise the limits once we enter a user-namespace if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil { return newSystemErrorWithCause(err, "setting rlimits for process") } if err := utils.WriteJSON(p.parentPipe, p.config); err != nil { return newSystemErrorWithCause(err, "writing config to pipe") } if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil { return newSystemErrorWithCause(err, "calling shutdown on init pipe") } // wait for the child process to fully complete and receive an error message // if one was encoutered var ierr *genericError if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF { return newSystemErrorWithCause(err, "decoding init error from pipe") } // Must be done after Shutdown so the child will exit and we can wait for it. if ierr != nil { p.wait() return ierr } return nil } // execSetns runs the process that executes C code to perform the setns calls // because setns support requires the C process to fork off a child and perform the setns // before the go runtime boots, we wait on the process to die and receive the child's pid // over the provided pipe. func (p *setnsProcess) execSetns() error { status, err := p.cmd.Process.Wait() if err != nil { p.cmd.Wait() return newSystemErrorWithCause(err, "waiting on setns process to finish") } if !status.Success() { p.cmd.Wait() return newSystemError(&exec.ExitError{ProcessState: status}) } var pid *pid if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil { p.cmd.Wait() return newSystemErrorWithCause(err, "reading pid from init pipe") } process, err := os.FindProcess(pid.Pid) if err != nil { return err } p.cmd.Process = process p.process.ops = p return nil } // terminate sends a SIGKILL to the forked process for the setns routine then waits to // avoid the process becoming a zombie. func (p *setnsProcess) terminate() error { if p.cmd.Process == nil { return nil } err := p.cmd.Process.Kill() if _, werr := p.wait(); err == nil { err = werr } return err } func (p *setnsProcess) wait() (*os.ProcessState, error) { err := p.cmd.Wait() // Return actual ProcessState even on Wait error return p.cmd.ProcessState, err } func (p *setnsProcess) pid() int { return p.cmd.Process.Pid } func (p *setnsProcess) externalDescriptors() []string { return p.fds } func (p *setnsProcess) setExternalDescriptors(newFds []string) { p.fds = newFds } type initProcess struct { cmd *exec.Cmd parentPipe *os.File childPipe *os.File config *initConfig manager cgroups.Manager container *linuxContainer fds []string process *Process bootstrapData io.Reader sharePidns bool rootDir *os.File } func (p *initProcess) pid() int { return p.cmd.Process.Pid } func (p *initProcess) externalDescriptors() []string { return p.fds } // execSetns runs the process that executes C code to perform the setns calls // because setns support requires the C process to fork off a child and perform the setns // before the go runtime boots, we wait on the process to die and receive the child's pid // over the provided pipe. // This is called by initProcess.start function func (p *initProcess) execSetns() error { status, err := p.cmd.Process.Wait() if err != nil { p.cmd.Wait() return err } if !status.Success() { p.cmd.Wait() return &exec.ExitError{ProcessState: status} } var pid *pid if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil { p.cmd.Wait() return err } process, err := os.FindProcess(pid.Pid) if err != nil { return err } p.cmd.Process = process p.process.ops = p return nil } func (p *initProcess) start() error { defer p.parentPipe.Close() err := p.cmd.Start() p.process.ops = p p.childPipe.Close() p.rootDir.Close() if err != nil { p.process.ops = nil return newSystemErrorWithCause(err, "starting init process command") } if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil { return err } if err := p.execSetns(); err != nil { return newSystemErrorWithCause(err, "running exec setns process for init") } // Save the standard descriptor names before the container process // can potentially move them (e.g., via dup2()). If we don't do this now, // we won't know at checkpoint time which file descriptor to look up. fds, err := getPipeFds(p.pid()) if err != nil { return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid()) } p.setExternalDescriptors(fds) // Do this before syncing with child so that no children // can escape the cgroup if err := p.manager.Apply(p.pid()); err != nil { return newSystemErrorWithCause(err, "applying cgroup configuration for process") } defer func() { if err != nil { // TODO: should not be the responsibility to call here p.manager.Destroy() } }() if err := p.createNetworkInterfaces(); err != nil { return newSystemErrorWithCause(err, "creating network interfaces") } if err := p.sendConfig(); err != nil { return newSystemErrorWithCause(err, "sending config to init process") } var ( procSync syncT sentRun bool sentResume bool ierr *genericError ) dec := json.NewDecoder(p.parentPipe) loop: for { if err := dec.Decode(&procSync); err != nil { if err == io.EOF { break loop } return newSystemErrorWithCause(err, "decoding sync type from init pipe") } switch procSync.Type { case procReady: if err := p.manager.Set(p.config.Config); err != nil { return newSystemErrorWithCause(err, "setting cgroup config for ready process") } // set oom_score_adj if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil { return newSystemErrorWithCause(err, "setting oom score for ready process") } // set rlimits, this has to be done here because we lose permissions // to raise the limits once we enter a user-namespace if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil { return newSystemErrorWithCause(err, "setting rlimits for ready process") } // call prestart hooks if !p.config.Config.Namespaces.Contains(configs.NEWNS) { if p.config.Config.Hooks != nil { s := configs.HookState{ Version: p.container.config.Version, ID: p.container.id, Pid: p.pid(), Root: p.config.Config.Rootfs, } for i, hook := range p.config.Config.Hooks.Prestart { if err := hook.Run(s); err != nil { return newSystemErrorWithCausef(err, "running prestart hook %d", i) } } } } // Sync with child. if err := utils.WriteJSON(p.parentPipe, syncT{procRun}); err != nil { return newSystemErrorWithCause(err, "writing syncT run type") } sentRun = true case procHooks: if p.config.Config.Hooks != nil { s := configs.HookState{ Version: p.container.config.Version, ID: p.container.id, Pid: p.pid(), Root: p.config.Config.Rootfs, BundlePath: utils.SearchLabels(p.config.Config.Labels, "bundle"), } for i, hook := range p.config.Config.Hooks.Prestart { if err := hook.Run(s); err != nil { return newSystemErrorWithCausef(err, "running prestart hook %d", i) } } } // Sync with child. if err := utils.WriteJSON(p.parentPipe, syncT{procResume}); err != nil { return newSystemErrorWithCause(err, "writing syncT resume type") } sentResume = true case procError: // wait for the child process to fully complete and receive an error message // if one was encoutered if err := dec.Decode(&ierr); err != nil && err != io.EOF { return newSystemErrorWithCause(err, "decoding proc error from init") } if ierr != nil { break loop } // Programmer error. panic("No error following JSON procError payload.") default: return newSystemError(fmt.Errorf("invalid JSON payload from child")) } } if !sentRun { return newSystemErrorWithCause(ierr, "container init") } if p.config.Config.Namespaces.Contains(configs.NEWNS) && !sentResume { return newSystemError(fmt.Errorf("could not synchronise after executing prestart hooks with container process")) } if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil { return newSystemErrorWithCause(err, "shutting down init pipe") } // Must be done after Shutdown so the child will exit and we can wait for it. if ierr != nil { p.wait() return ierr } return nil } func (p *initProcess) wait() (*os.ProcessState, error) { err := p.cmd.Wait() if err != nil { return p.cmd.ProcessState, err } // we should kill all processes in cgroup when init is died if we use host PID namespace if p.sharePidns { signalAllProcesses(p.manager, syscall.SIGKILL) } return p.cmd.ProcessState, nil } func (p *initProcess) terminate() error { if p.cmd.Process == nil { return nil } err := p.cmd.Process.Kill() if _, werr := p.wait(); err == nil { err = werr } return err } func (p *initProcess) startTime() (string, error) { return system.GetProcessStartTime(p.pid()) } func (p *initProcess) sendConfig() error { // send the config to the container's init process, we don't use JSON Encode // here because there might be a problem in JSON decoder in some cases, see: // https://github.com/docker/docker/issues/14203#issuecomment-174177790 return utils.WriteJSON(p.parentPipe, p.config) } func (p *initProcess) createNetworkInterfaces() error { for _, config := range p.config.Config.Networks { strategy, err := getStrategy(config.Type) if err != nil { return err } n := &network{ Network: *config, } if err := strategy.create(n, p.pid()); err != nil { return err } p.config.Networks = append(p.config.Networks, n) } return nil } func (p *initProcess) signal(sig os.Signal) error { s, ok := sig.(syscall.Signal) if !ok { return errors.New("os: unsupported signal type") } return syscall.Kill(p.pid(), s) } func (p *initProcess) setExternalDescriptors(newFds []string) { p.fds = newFds } func getPipeFds(pid int) ([]string, error) { fds := make([]string, 3) dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd") for i := 0; i < 3; i++ { f := filepath.Join(dirPath, strconv.Itoa(i)) target, err := os.Readlink(f) if err != nil { return fds, err } fds[i] = target } return fds, nil } // InitializeIO creates pipes for use with the process's STDIO // and returns the opposite side for each func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) { var fds []uintptr i = &IO{} // cleanup in case of an error defer func() { if err != nil { for _, fd := range fds { syscall.Close(int(fd)) } } }() // STDIN r, w, err := os.Pipe() if err != nil { return nil, err } fds = append(fds, r.Fd(), w.Fd()) p.Stdin, i.Stdin = r, w // STDOUT if r, w, err = os.Pipe(); err != nil { return nil, err } fds = append(fds, r.Fd(), w.Fd()) p.Stdout, i.Stdout = w, r // STDERR if r, w, err = os.Pipe(); err != nil { return nil, err } fds = append(fds, r.Fd(), w.Fd()) p.Stderr, i.Stderr = w, r // change ownership of the pipes incase we are in a user namespace for _, fd := range fds { if err := syscall.Fchown(int(fd), rootuid, rootgid); err != nil { return nil, err } } return i, nil } docker-runc-tags-docker-1.13.1/libcontainer/restored_process.go000066400000000000000000000051751304443252500246110ustar00rootroot00000000000000// +build linux package libcontainer import ( "fmt" "os" "github.com/opencontainers/runc/libcontainer/system" ) func newRestoredProcess(pid int, fds []string) (*restoredProcess, error) { var ( err error ) proc, err := os.FindProcess(pid) if err != nil { return nil, err } started, err := system.GetProcessStartTime(pid) if err != nil { return nil, err } return &restoredProcess{ proc: proc, processStartTime: started, fds: fds, }, nil } type restoredProcess struct { proc *os.Process processStartTime string fds []string } func (p *restoredProcess) start() error { return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError) } func (p *restoredProcess) pid() int { return p.proc.Pid } func (p *restoredProcess) terminate() error { err := p.proc.Kill() if _, werr := p.wait(); err == nil { err = werr } return err } func (p *restoredProcess) wait() (*os.ProcessState, error) { // TODO: how do we wait on the actual process? // maybe use --exec-cmd in criu st, err := p.proc.Wait() if err != nil { return nil, err } return st, nil } func (p *restoredProcess) startTime() (string, error) { return p.processStartTime, nil } func (p *restoredProcess) signal(s os.Signal) error { return p.proc.Signal(s) } func (p *restoredProcess) externalDescriptors() []string { return p.fds } func (p *restoredProcess) setExternalDescriptors(newFds []string) { p.fds = newFds } // nonChildProcess represents a process where the calling process is not // the parent process. This process is created when a factory loads a container from // a persisted state. type nonChildProcess struct { processPid int processStartTime string fds []string } func (p *nonChildProcess) start() error { return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError) } func (p *nonChildProcess) pid() int { return p.processPid } func (p *nonChildProcess) terminate() error { return newGenericError(fmt.Errorf("restored process cannot be terminated"), SystemError) } func (p *nonChildProcess) wait() (*os.ProcessState, error) { return nil, newGenericError(fmt.Errorf("restored process cannot be waited on"), SystemError) } func (p *nonChildProcess) startTime() (string, error) { return p.processStartTime, nil } func (p *nonChildProcess) signal(s os.Signal) error { proc, err := os.FindProcess(p.processPid) if err != nil { return err } return proc.Signal(s) } func (p *nonChildProcess) externalDescriptors() []string { return p.fds } func (p *nonChildProcess) setExternalDescriptors(newFds []string) { p.fds = newFds } docker-runc-tags-docker-1.13.1/libcontainer/rootfs_linux.go000066400000000000000000000541371304443252500237610ustar00rootroot00000000000000// +build linux package libcontainer import ( "fmt" "io" "io/ioutil" "os" "os/exec" "path" "path/filepath" "strings" "syscall" "time" "github.com/docker/docker/pkg/mount" "github.com/docker/docker/pkg/symlink" "github.com/mrunalp/fileutils" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/label" "github.com/opencontainers/runc/libcontainer/system" libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" ) const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV // needsSetupDev returns true if /dev needs to be set up. func needsSetupDev(config *configs.Config) bool { for _, m := range config.Mounts { if m.Device == "bind" && libcontainerUtils.CleanPath(m.Destination) == "/dev" { return false } } return true } // setupRootfs sets up the devices, mount points, and filesystems for use inside a // new mount namespace. func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWriter) (err error) { if err := prepareRoot(config); err != nil { return newSystemErrorWithCause(err, "preparing rootfs") } setupDev := needsSetupDev(config) for _, m := range config.Mounts { for _, precmd := range m.PremountCmds { if err := mountCmd(precmd); err != nil { return newSystemErrorWithCause(err, "running premount command") } } if err := mountToRootfs(m, config.Rootfs, config.MountLabel); err != nil { return newSystemErrorWithCausef(err, "mounting %q to rootfs %q at %q", m.Source, config.Rootfs, m.Destination) } for _, postcmd := range m.PostmountCmds { if err := mountCmd(postcmd); err != nil { return newSystemErrorWithCause(err, "running postmount command") } } } if setupDev { if err := createDevices(config); err != nil { return newSystemErrorWithCause(err, "creating device nodes") } if err := setupPtmx(config, console); err != nil { return newSystemErrorWithCause(err, "setting up ptmx") } if err := setupDevSymlinks(config.Rootfs); err != nil { return newSystemErrorWithCause(err, "setting up /dev symlinks") } } // Signal the parent to run the pre-start hooks. // The hooks are run after the mounts are setup, but before we switch to the new // root, so that the old root is still available in the hooks for any mount // manipulations. if err := syncParentHooks(pipe); err != nil { return err } if err := syscall.Chdir(config.Rootfs); err != nil { return newSystemErrorWithCausef(err, "changing dir to %q", config.Rootfs) } if config.NoPivotRoot { err = msMoveRoot(config.Rootfs) } else { err = pivotRoot(config.Rootfs) } if err != nil { return newSystemErrorWithCause(err, "jailing process inside rootfs") } if setupDev { if err := reOpenDevNull(); err != nil { return newSystemErrorWithCause(err, "reopening /dev/null inside container") } } // remount dev as ro if specified for _, m := range config.Mounts { if libcontainerUtils.CleanPath(m.Destination) == "/dev" { if m.Flags&syscall.MS_RDONLY != 0 { if err := remountReadonly(m.Destination); err != nil { return newSystemErrorWithCausef(err, "remounting %q as readonly", m.Destination) } } break } } // set rootfs ( / ) as readonly if config.Readonlyfs { if err := setReadonly(); err != nil { return newSystemErrorWithCause(err, "setting rootfs as readonly") } } syscall.Umask(0022) return nil } func mountCmd(cmd configs.Command) error { command := exec.Command(cmd.Path, cmd.Args[:]...) command.Env = cmd.Env command.Dir = cmd.Dir if out, err := command.CombinedOutput(); err != nil { return fmt.Errorf("%#v failed: %s: %v", cmd, string(out), err) } return nil } func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error { var ( dest = m.Destination ) if !strings.HasPrefix(dest, rootfs) { dest = filepath.Join(rootfs, dest) } switch m.Device { case "proc", "sysfs": if err := os.MkdirAll(dest, 0755); err != nil { return err } // Selinux kernels do not support labeling of /proc or /sys return mountPropagate(m, rootfs, "") case "mqueue": if err := os.MkdirAll(dest, 0755); err != nil { return err } if err := mountPropagate(m, rootfs, mountLabel); err != nil { // older kernels do not support labeling of /dev/mqueue if err := mountPropagate(m, rootfs, ""); err != nil { return err } return label.SetFileLabel(dest, mountLabel) } return nil case "tmpfs": copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP tmpDir := "" stat, err := os.Stat(dest) if err != nil { if err := os.MkdirAll(dest, 0755); err != nil { return err } } if copyUp { tmpDir, err = ioutil.TempDir("/tmp", "runctmpdir") if err != nil { return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir") } defer os.RemoveAll(tmpDir) m.Destination = tmpDir } if err := mountPropagate(m, rootfs, mountLabel); err != nil { return err } if copyUp { if err := fileutils.CopyDirectory(dest, tmpDir); err != nil { errMsg := fmt.Errorf("tmpcopyup: failed to copy %s to %s: %v", dest, tmpDir, err) if err1 := syscall.Unmount(tmpDir, syscall.MNT_DETACH); err1 != nil { return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg) } return errMsg } if err := syscall.Mount(tmpDir, dest, "", syscall.MS_MOVE, ""); err != nil { errMsg := fmt.Errorf("tmpcopyup: failed to move mount %s to %s: %v", tmpDir, dest, err) if err1 := syscall.Unmount(tmpDir, syscall.MNT_DETACH); err1 != nil { return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg) } return errMsg } } if stat != nil { if err = os.Chmod(dest, stat.Mode()); err != nil { return err } } return nil case "bind": stat, err := os.Stat(m.Source) if err != nil { // error out if the source of a bind mount does not exist as we will be // unable to bind anything to it. return err } // ensure that the destination of the bind mount is resolved of symlinks at mount time because // any previous mounts can invalidate the next mount's destination. // this can happen when a user specifies mounts within other mounts to cause breakouts or other // evil stuff to try to escape the container's rootfs. if dest, err = symlink.FollowSymlinkInScope(filepath.Join(rootfs, m.Destination), rootfs); err != nil { return err } if err := checkMountDestination(rootfs, dest); err != nil { return err } // update the mount with the correct dest after symlinks are resolved. m.Destination = dest if err := createIfNotExists(dest, stat.IsDir()); err != nil { return err } if err := mountPropagate(m, rootfs, mountLabel); err != nil { return err } // bind mount won't change mount options, we need remount to make mount options effective. // first check that we have non-default options required before attempting a remount if m.Flags&^(syscall.MS_REC|syscall.MS_REMOUNT|syscall.MS_BIND) != 0 { // only remount if unique mount options are set if err := remount(m, rootfs); err != nil { return err } } if m.Relabel != "" { if err := label.Validate(m.Relabel); err != nil { return err } shared := label.IsShared(m.Relabel) if err := label.Relabel(m.Source, mountLabel, shared); err != nil { return err } } case "cgroup": binds, err := getCgroupMounts(m) if err != nil { return err } var merged []string for _, b := range binds { ss := filepath.Base(b.Destination) if strings.Contains(ss, ",") { merged = append(merged, ss) } } tmpfs := &configs.Mount{ Source: "tmpfs", Device: "tmpfs", Destination: m.Destination, Flags: defaultMountFlags, Data: "mode=755", PropagationFlags: m.PropagationFlags, } if err := mountToRootfs(tmpfs, rootfs, mountLabel); err != nil { return err } for _, b := range binds { if err := mountToRootfs(b, rootfs, mountLabel); err != nil { return err } } for _, mc := range merged { for _, ss := range strings.Split(mc, ",") { // symlink(2) is very dumb, it will just shove the path into // the link and doesn't do any checks or relative path // conversion. Also, don't error out if the cgroup already exists. if err := os.Symlink(mc, filepath.Join(rootfs, m.Destination, ss)); err != nil && !os.IsExist(err) { return err } } } if m.Flags&syscall.MS_RDONLY != 0 { // remount cgroup root as readonly mcgrouproot := &configs.Mount{ Source: m.Destination, Device: "bind", Destination: m.Destination, Flags: defaultMountFlags | syscall.MS_RDONLY | syscall.MS_BIND, } if err := remount(mcgrouproot, rootfs); err != nil { return err } } default: if err := os.MkdirAll(dest, 0755); err != nil { return err } return mountPropagate(m, rootfs, mountLabel) } return nil } func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) { mounts, err := cgroups.GetCgroupMounts(false) if err != nil { return nil, err } cgroupPaths, err := cgroups.ParseCgroupFile("/proc/self/cgroup") if err != nil { return nil, err } var binds []*configs.Mount for _, mm := range mounts { dir, err := mm.GetThisCgroupDir(cgroupPaths) if err != nil { return nil, err } relDir, err := filepath.Rel(mm.Root, dir) if err != nil { return nil, err } binds = append(binds, &configs.Mount{ Device: "bind", Source: filepath.Join(mm.Mountpoint, relDir), Destination: filepath.Join(m.Destination, strings.Join(mm.Subsystems, ",")), Flags: syscall.MS_BIND | syscall.MS_REC | m.Flags, PropagationFlags: m.PropagationFlags, }) } return binds, nil } // checkMountDestination checks to ensure that the mount destination is not over the top of /proc. // dest is required to be an abs path and have any symlinks resolved before calling this function. func checkMountDestination(rootfs, dest string) error { invalidDestinations := []string{ "/proc", } // White list, it should be sub directories of invalid destinations validDestinations := []string{ // These entries can be bind mounted by files emulated by fuse, // so commands like top, free displays stats in container. "/proc/cpuinfo", "/proc/diskstats", "/proc/meminfo", "/proc/stat", "/proc/swaps", "/proc/uptime", "/proc/net/dev", } for _, valid := range validDestinations { path, err := filepath.Rel(filepath.Join(rootfs, valid), dest) if err != nil { return err } if path == "." { return nil } } for _, invalid := range invalidDestinations { path, err := filepath.Rel(filepath.Join(rootfs, invalid), dest) if err != nil { return err } if path == "." || !strings.HasPrefix(path, "..") { return fmt.Errorf("%q cannot be mounted because it is located inside %q", dest, invalid) } } return nil } func setupDevSymlinks(rootfs string) error { var links = [][2]string{ {"/proc/self/fd", "/dev/fd"}, {"/proc/self/fd/0", "/dev/stdin"}, {"/proc/self/fd/1", "/dev/stdout"}, {"/proc/self/fd/2", "/dev/stderr"}, } // kcore support can be toggled with CONFIG_PROC_KCORE; only create a symlink // in /dev if it exists in /proc. if _, err := os.Stat("/proc/kcore"); err == nil { links = append(links, [2]string{"/proc/kcore", "/dev/core"}) } for _, link := range links { var ( src = link[0] dst = filepath.Join(rootfs, link[1]) ) if err := os.Symlink(src, dst); err != nil && !os.IsExist(err) { return fmt.Errorf("symlink %s %s %s", src, dst, err) } } return nil } // If stdin, stdout, and/or stderr are pointing to `/dev/null` in the parent's rootfs // this method will make them point to `/dev/null` in this container's rootfs. This // needs to be called after we chroot/pivot into the container's rootfs so that any // symlinks are resolved locally. func reOpenDevNull() error { var stat, devNullStat syscall.Stat_t file, err := os.OpenFile("/dev/null", os.O_RDWR, 0) if err != nil { return fmt.Errorf("Failed to open /dev/null - %s", err) } defer file.Close() if err := syscall.Fstat(int(file.Fd()), &devNullStat); err != nil { return err } for fd := 0; fd < 3; fd++ { if err := syscall.Fstat(fd, &stat); err != nil { return err } if stat.Rdev == devNullStat.Rdev { // Close and re-open the fd. if err := syscall.Dup3(int(file.Fd()), fd, 0); err != nil { return err } } } return nil } // Create the device nodes in the container. func createDevices(config *configs.Config) error { useBindMount := system.RunningInUserNS() || config.Namespaces.Contains(configs.NEWUSER) oldMask := syscall.Umask(0000) for _, node := range config.Devices { // containers running in a user namespace are not allowed to mknod // devices so we can just bind mount it from the host. if err := createDeviceNode(config.Rootfs, node, useBindMount); err != nil { syscall.Umask(oldMask) return err } } syscall.Umask(oldMask) return nil } func bindMountDeviceNode(dest string, node *configs.Device) error { f, err := os.Create(dest) if err != nil && !os.IsExist(err) { return err } if f != nil { f.Close() } return syscall.Mount(node.Path, dest, "bind", syscall.MS_BIND, "") } // Creates the device node in the rootfs of the container. func createDeviceNode(rootfs string, node *configs.Device, bind bool) error { dest := filepath.Join(rootfs, node.Path) if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil { return err } if bind { return bindMountDeviceNode(dest, node) } if err := mknodDevice(dest, node); err != nil { if os.IsExist(err) { return nil } else if os.IsPermission(err) { return bindMountDeviceNode(dest, node) } return err } return nil } func mknodDevice(dest string, node *configs.Device) error { fileMode := node.FileMode switch node.Type { case 'c': fileMode |= syscall.S_IFCHR case 'b': fileMode |= syscall.S_IFBLK default: return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path) } if err := syscall.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil { return err } return syscall.Chown(dest, int(node.Uid), int(node.Gid)) } func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info { for _, m := range mountinfo { if m.Mountpoint == dir { return m } } return nil } // Get the parent mount point of directory passed in as argument. Also return // optional fields. func getParentMount(rootfs string) (string, string, error) { var path string mountinfos, err := mount.GetMounts() if err != nil { return "", "", err } mountinfo := getMountInfo(mountinfos, rootfs) if mountinfo != nil { return rootfs, mountinfo.Optional, nil } path = rootfs for { path = filepath.Dir(path) mountinfo = getMountInfo(mountinfos, path) if mountinfo != nil { return path, mountinfo.Optional, nil } if path == "/" { break } } // If we are here, we did not find parent mount. Something is wrong. return "", "", fmt.Errorf("Could not find parent mount of %s", rootfs) } // Make parent mount private if it was shared func rootfsParentMountPrivate(rootfs string) error { sharedMount := false parentMount, optionalOpts, err := getParentMount(rootfs) if err != nil { return err } optsSplit := strings.Split(optionalOpts, " ") for _, opt := range optsSplit { if strings.HasPrefix(opt, "shared:") { sharedMount = true break } } // Make parent mount PRIVATE if it was shared. It is needed for two // reasons. First of all pivot_root() will fail if parent mount is // shared. Secondly when we bind mount rootfs it will propagate to // parent namespace and we don't want that to happen. if sharedMount { return syscall.Mount("", parentMount, "", syscall.MS_PRIVATE, "") } return nil } func prepareRoot(config *configs.Config) error { flag := syscall.MS_SLAVE | syscall.MS_REC if config.RootPropagation != 0 { flag = config.RootPropagation } if err := syscall.Mount("", "/", "", uintptr(flag), ""); err != nil { return err } // Make parent mount private to make sure following bind mount does // not propagate in other namespaces. Also it will help with kernel // check pass in pivot_root. (IS_SHARED(new_mnt->mnt_parent)) if err := rootfsParentMountPrivate(config.Rootfs); err != nil { return err } return syscall.Mount(config.Rootfs, config.Rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, "") } func setReadonly() error { return syscall.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, "") } func setupPtmx(config *configs.Config, console *linuxConsole) error { ptmx := filepath.Join(config.Rootfs, "dev/ptmx") if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) { return err } if err := os.Symlink("pts/ptmx", ptmx); err != nil { return fmt.Errorf("symlink dev ptmx %s", err) } if console != nil { return console.mount(config.Rootfs, config.MountLabel) } return nil } // pivotRoot will call pivot_root such that rootfs becomes the new root // filesystem, and everything else is cleaned up. func pivotRoot(rootfs string) error { // While the documentation may claim otherwise, pivot_root(".", ".") is // actually valid. What this results in is / being the new root but // /proc/self/cwd being the old root. Since we can play around with the cwd // with pivot_root this allows us to pivot without creating directories in // the rootfs. Shout-outs to the LXC developers for giving us this idea. oldroot, err := syscall.Open("/", syscall.O_DIRECTORY|syscall.O_RDONLY, 0) if err != nil { return err } defer syscall.Close(oldroot) newroot, err := syscall.Open(rootfs, syscall.O_DIRECTORY|syscall.O_RDONLY, 0) if err != nil { return err } defer syscall.Close(newroot) // Change to the new root so that the pivot_root actually acts on it. if err := syscall.Fchdir(newroot); err != nil { return err } if err := syscall.PivotRoot(".", "."); err != nil { return fmt.Errorf("pivot_root %s", err) } // Currently our "." is oldroot (according to the current kernel code). // However, purely for safety, we will fchdir(oldroot) since there isn't // really any guarantee from the kernel what /proc/self/cwd will be after a // pivot_root(2). if err := syscall.Fchdir(oldroot); err != nil { return err } // Make oldroot rprivate to make sure our unmounts don't propagate to the // host (and thus bork the machine). if err := syscall.Mount("", ".", "", syscall.MS_PRIVATE|syscall.MS_REC, ""); err != nil { return err } // Preform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd. if err := syscall.Unmount(".", syscall.MNT_DETACH); err != nil { return err } // Switch back to our shiny new root. if err := syscall.Chdir("/"); err != nil { return fmt.Errorf("chdir / %s", err) } return nil } func msMoveRoot(rootfs string) error { if err := syscall.Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil { return err } if err := syscall.Chroot("."); err != nil { return err } return syscall.Chdir("/") } // createIfNotExists creates a file or a directory only if it does not already exist. func createIfNotExists(path string, isDir bool) error { if _, err := os.Stat(path); err != nil { if os.IsNotExist(err) { if isDir { return os.MkdirAll(path, 0755) } if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { return err } f, err := os.OpenFile(path, os.O_CREATE, 0755) if err != nil { return err } f.Close() } } return nil } // remountReadonly will bind over the top of an existing path and ensure that it is read-only. func remountReadonly(path string) error { for i := 0; i < 5; i++ { if err := syscall.Mount("", path, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil && !os.IsNotExist(err) { switch err { case syscall.EINVAL: // Probably not a mountpoint, use bind-mount if err := syscall.Mount(path, path, "", syscall.MS_BIND, ""); err != nil { return err } return syscall.Mount(path, path, "", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC|defaultMountFlags, "") case syscall.EBUSY: time.Sleep(100 * time.Millisecond) continue default: return err } } return nil } return fmt.Errorf("unable to mount %s as readonly max retries reached", path) } // maskPath masks the top of the specified path inside a container to avoid // security issues from processes reading information from non-namespace aware // mounts ( proc/kcore ). // For files, maskPath bind mounts /dev/null over the top of the specified path. // For directories, maskPath mounts read-only tmpfs over the top of the specified path. func maskPath(path string) error { if err := syscall.Mount("/dev/null", path, "", syscall.MS_BIND, ""); err != nil && !os.IsNotExist(err) { if err == syscall.ENOTDIR { return syscall.Mount("tmpfs", path, "tmpfs", syscall.MS_RDONLY, "") } return err } return nil } // writeSystemProperty writes the value to a path under /proc/sys as determined from the key. // For e.g. net.ipv4.ip_forward translated to /proc/sys/net/ipv4/ip_forward. func writeSystemProperty(key, value string) error { keyPath := strings.Replace(key, ".", "/", -1) return ioutil.WriteFile(path.Join("/proc/sys", keyPath), []byte(value), 0644) } func remount(m *configs.Mount, rootfs string) error { var ( dest = m.Destination ) if !strings.HasPrefix(dest, rootfs) { dest = filepath.Join(rootfs, dest) } if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags|syscall.MS_REMOUNT), ""); err != nil { return err } return nil } // Do the mount operation followed by additional mounts required to take care // of propagation flags. func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error { var ( dest = m.Destination data = label.FormatMountLabel(m.Data, mountLabel) flags = m.Flags ) if libcontainerUtils.CleanPath(dest) == "/dev" { flags &= ^syscall.MS_RDONLY } copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP if !(copyUp || strings.HasPrefix(dest, rootfs)) { dest = filepath.Join(rootfs, dest) } if err := syscall.Mount(m.Source, dest, m.Device, uintptr(flags), data); err != nil { return err } for _, pflag := range m.PropagationFlags { if err := syscall.Mount("", dest, "", uintptr(pflag), ""); err != nil { return err } } return nil } docker-runc-tags-docker-1.13.1/libcontainer/rootfs_linux_test.go000066400000000000000000000035261304443252500250140ustar00rootroot00000000000000// +build linux package libcontainer import ( "testing" "github.com/opencontainers/runc/libcontainer/configs" ) func TestCheckMountDestOnProc(t *testing.T) { dest := "/rootfs/proc/" err := checkMountDestination("/rootfs", dest) if err == nil { t.Fatal("destination inside proc should return an error") } } func TestCheckMountDestInSys(t *testing.T) { dest := "/rootfs//sys/fs/cgroup" err := checkMountDestination("/rootfs", dest) if err != nil { t.Fatal("destination inside /sys should not return an error") } } func TestCheckMountDestFalsePositive(t *testing.T) { dest := "/rootfs/sysfiles/fs/cgroup" err := checkMountDestination("/rootfs", dest) if err != nil { t.Fatal(err) } } func TestNeedsSetupDev(t *testing.T) { config := &configs.Config{ Mounts: []*configs.Mount{ { Device: "bind", Source: "/dev", Destination: "/dev", }, }, } if needsSetupDev(config) { t.Fatal("expected needsSetupDev to be false, got true") } } func TestNeedsSetupDevStrangeSource(t *testing.T) { config := &configs.Config{ Mounts: []*configs.Mount{ { Device: "bind", Source: "/devx", Destination: "/dev", }, }, } if needsSetupDev(config) { t.Fatal("expected needsSetupDev to be false, got true") } } func TestNeedsSetupDevStrangeDest(t *testing.T) { config := &configs.Config{ Mounts: []*configs.Mount{ { Device: "bind", Source: "/dev", Destination: "/devx", }, }, } if !needsSetupDev(config) { t.Fatal("expected needsSetupDev to be true, got false") } } func TestNeedsSetupDevStrangeSourceDest(t *testing.T) { config := &configs.Config{ Mounts: []*configs.Mount{ { Device: "bind", Source: "/devx", Destination: "/devx", }, }, } if !needsSetupDev(config) { t.Fatal("expected needsSetupDev to be true, got false") } } docker-runc-tags-docker-1.13.1/libcontainer/seccomp/000077500000000000000000000000001304443252500223165ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/libcontainer/seccomp/config.go000066400000000000000000000047371304443252500241250ustar00rootroot00000000000000package seccomp import ( "fmt" "github.com/opencontainers/runc/libcontainer/configs" ) var operators = map[string]configs.Operator{ "SCMP_CMP_NE": configs.NotEqualTo, "SCMP_CMP_LT": configs.LessThan, "SCMP_CMP_LE": configs.LessThanOrEqualTo, "SCMP_CMP_EQ": configs.EqualTo, "SCMP_CMP_GE": configs.GreaterThanOrEqualTo, "SCMP_CMP_GT": configs.GreaterThan, "SCMP_CMP_MASKED_EQ": configs.MaskEqualTo, } var actions = map[string]configs.Action{ "SCMP_ACT_KILL": configs.Kill, "SCMP_ACT_ERRNO": configs.Errno, "SCMP_ACT_TRAP": configs.Trap, "SCMP_ACT_ALLOW": configs.Allow, "SCMP_ACT_TRACE": configs.Trace, } var archs = map[string]string{ "SCMP_ARCH_X86": "x86", "SCMP_ARCH_X86_64": "amd64", "SCMP_ARCH_X32": "x32", "SCMP_ARCH_ARM": "arm", "SCMP_ARCH_AARCH64": "arm64", "SCMP_ARCH_MIPS": "mips", "SCMP_ARCH_MIPS64": "mips64", "SCMP_ARCH_MIPS64N32": "mips64n32", "SCMP_ARCH_MIPSEL": "mipsel", "SCMP_ARCH_MIPSEL64": "mipsel64", "SCMP_ARCH_MIPSEL64N32": "mipsel64n32", "SCMP_ARCH_PPC": "ppc", "SCMP_ARCH_PPC64": "ppc64", "SCMP_ARCH_PPC64LE": "ppc64le", "SCMP_ARCH_S390": "s390", "SCMP_ARCH_S390X": "s390x", } // ConvertStringToOperator converts a string into a Seccomp comparison operator. // Comparison operators use the names they are assigned by Libseccomp's header. // Attempting to convert a string that is not a valid operator results in an // error. func ConvertStringToOperator(in string) (configs.Operator, error) { if op, ok := operators[in]; ok == true { return op, nil } return 0, fmt.Errorf("string %s is not a valid operator for seccomp", in) } // ConvertStringToAction converts a string into a Seccomp rule match action. // Actions use the names they are assigned in Libseccomp's header, though some // (notable, SCMP_ACT_TRACE) are not available in this implementation and will // return errors. // Attempting to convert a string that is not a valid action results in an // error. func ConvertStringToAction(in string) (configs.Action, error) { if act, ok := actions[in]; ok == true { return act, nil } return 0, fmt.Errorf("string %s is not a valid action for seccomp", in) } // ConvertStringToArch converts a string into a Seccomp comparison arch. func ConvertStringToArch(in string) (string, error) { if arch, ok := archs[in]; ok == true { return arch, nil } return "", fmt.Errorf("string %s is not a valid arch for seccomp", in) } docker-runc-tags-docker-1.13.1/libcontainer/seccomp/fixtures/000077500000000000000000000000001304443252500241675ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/libcontainer/seccomp/fixtures/proc_self_status000066400000000000000000000017571304443252500275030ustar00rootroot00000000000000Name: cat State: R (running) Tgid: 19383 Ngid: 0 Pid: 19383 PPid: 19275 TracerPid: 0 Uid: 1000 1000 1000 1000 Gid: 1000 1000 1000 1000 FDSize: 256 Groups: 24 25 27 29 30 44 46 102 104 108 111 1000 1001 NStgid: 19383 NSpid: 19383 NSpgid: 19383 NSsid: 19275 VmPeak: 5944 kB VmSize: 5944 kB VmLck: 0 kB VmPin: 0 kB VmHWM: 744 kB VmRSS: 744 kB VmData: 324 kB VmStk: 136 kB VmExe: 48 kB VmLib: 1776 kB VmPTE: 32 kB VmPMD: 12 kB VmSwap: 0 kB Threads: 1 SigQ: 0/30067 SigPnd: 0000000000000000 ShdPnd: 0000000000000000 SigBlk: 0000000000000000 SigIgn: 0000000000000080 SigCgt: 0000000000000000 CapInh: 0000000000000000 CapPrm: 0000000000000000 CapEff: 0000000000000000 CapBnd: 0000003fffffffff CapAmb: 0000000000000000 Seccomp: 0 Cpus_allowed: f Cpus_allowed_list: 0-3 Mems_allowed: 00000000,00000001 Mems_allowed_list: 0 voluntary_ctxt_switches: 0 nonvoluntary_ctxt_switches: 1 docker-runc-tags-docker-1.13.1/libcontainer/seccomp/seccomp_linux.go000066400000000000000000000133671304443252500255270ustar00rootroot00000000000000// +build linux,cgo,seccomp package seccomp import ( "bufio" "fmt" "os" "strings" "syscall" "github.com/opencontainers/runc/libcontainer/configs" libseccomp "github.com/seccomp/libseccomp-golang" ) var ( actAllow = libseccomp.ActAllow actTrap = libseccomp.ActTrap actKill = libseccomp.ActKill actTrace = libseccomp.ActTrace.SetReturnCode(int16(syscall.EPERM)) actErrno = libseccomp.ActErrno.SetReturnCode(int16(syscall.EPERM)) // SeccompModeFilter refers to the syscall argument SECCOMP_MODE_FILTER. SeccompModeFilter = uintptr(2) ) // Filters given syscalls in a container, preventing them from being used // Started in the container init process, and carried over to all child processes // Setns calls, however, require a separate invocation, as they are not children // of the init until they join the namespace func InitSeccomp(config *configs.Seccomp) error { if config == nil { return fmt.Errorf("cannot initialize Seccomp - nil config passed") } defaultAction, err := getAction(config.DefaultAction) if err != nil { return fmt.Errorf("error initializing seccomp - invalid default action") } filter, err := libseccomp.NewFilter(defaultAction) if err != nil { return fmt.Errorf("error creating filter: %s", err) } // Add extra architectures for _, arch := range config.Architectures { scmpArch, err := libseccomp.GetArchFromString(arch) if err != nil { return err } if err := filter.AddArch(scmpArch); err != nil { return err } } // Unset no new privs bit if err := filter.SetNoNewPrivsBit(false); err != nil { return fmt.Errorf("error setting no new privileges: %s", err) } // Add a rule for each syscall for _, call := range config.Syscalls { if call == nil { return fmt.Errorf("encountered nil syscall while initializing Seccomp") } if err = matchCall(filter, call); err != nil { return err } } if err = filter.Load(); err != nil { return fmt.Errorf("error loading seccomp filter into kernel: %s", err) } return nil } // IsEnabled returns if the kernel has been configured to support seccomp. func IsEnabled() bool { // Try to read from /proc/self/status for kernels > 3.8 s, err := parseStatusFile("/proc/self/status") if err != nil { // Check if Seccomp is supported, via CONFIG_SECCOMP. if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_GET_SECCOMP, 0, 0); err != syscall.EINVAL { // Make sure the kernel has CONFIG_SECCOMP_FILTER. if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_SECCOMP, SeccompModeFilter, 0); err != syscall.EINVAL { return true } } return false } _, ok := s["Seccomp"] return ok } // Convert Libcontainer Action to Libseccomp ScmpAction func getAction(act configs.Action) (libseccomp.ScmpAction, error) { switch act { case configs.Kill: return actKill, nil case configs.Errno: return actErrno, nil case configs.Trap: return actTrap, nil case configs.Allow: return actAllow, nil case configs.Trace: return actTrace, nil default: return libseccomp.ActInvalid, fmt.Errorf("invalid action, cannot use in rule") } } // Convert Libcontainer Operator to Libseccomp ScmpCompareOp func getOperator(op configs.Operator) (libseccomp.ScmpCompareOp, error) { switch op { case configs.EqualTo: return libseccomp.CompareEqual, nil case configs.NotEqualTo: return libseccomp.CompareNotEqual, nil case configs.GreaterThan: return libseccomp.CompareGreater, nil case configs.GreaterThanOrEqualTo: return libseccomp.CompareGreaterEqual, nil case configs.LessThan: return libseccomp.CompareLess, nil case configs.LessThanOrEqualTo: return libseccomp.CompareLessOrEqual, nil case configs.MaskEqualTo: return libseccomp.CompareMaskedEqual, nil default: return libseccomp.CompareInvalid, fmt.Errorf("invalid operator, cannot use in rule") } } // Convert Libcontainer Arg to Libseccomp ScmpCondition func getCondition(arg *configs.Arg) (libseccomp.ScmpCondition, error) { cond := libseccomp.ScmpCondition{} if arg == nil { return cond, fmt.Errorf("cannot convert nil to syscall condition") } op, err := getOperator(arg.Op) if err != nil { return cond, err } return libseccomp.MakeCondition(arg.Index, op, arg.Value, arg.ValueTwo) } // Add a rule to match a single syscall func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall) error { if call == nil || filter == nil { return fmt.Errorf("cannot use nil as syscall to block") } if len(call.Name) == 0 { return fmt.Errorf("empty string is not a valid syscall") } // If we can't resolve the syscall, assume it's not supported on this kernel // Ignore it, don't error out callNum, err := libseccomp.GetSyscallFromName(call.Name) if err != nil { return nil } // Convert the call's action to the libseccomp equivalent callAct, err := getAction(call.Action) if err != nil { return err } // Unconditional match - just add the rule if len(call.Args) == 0 { if err = filter.AddRule(callNum, callAct); err != nil { return err } } else { // Conditional match - convert the per-arg rules into library format conditions := []libseccomp.ScmpCondition{} for _, cond := range call.Args { newCond, err := getCondition(cond) if err != nil { return err } conditions = append(conditions, newCond) } if err = filter.AddRuleConditional(callNum, callAct, conditions); err != nil { return err } } return nil } func parseStatusFile(path string) (map[string]string, error) { f, err := os.Open(path) if err != nil { return nil, err } defer f.Close() s := bufio.NewScanner(f) status := make(map[string]string) for s.Scan() { if err := s.Err(); err != nil { return nil, err } text := s.Text() parts := strings.Split(text, ":") if len(parts) <= 1 { continue } status[parts[0]] = parts[1] } return status, nil } docker-runc-tags-docker-1.13.1/libcontainer/seccomp/seccomp_linux_test.go000066400000000000000000000004541304443252500265570ustar00rootroot00000000000000// +build linux,cgo,seccomp package seccomp import "testing" func TestParseStatusFile(t *testing.T) { s, err := parseStatusFile("fixtures/proc_self_status") if err != nil { t.Fatal(err) } if _, ok := s["Seccomp"]; !ok { t.Fatal("expected to find 'Seccomp' in the map but did not.") } } docker-runc-tags-docker-1.13.1/libcontainer/seccomp/seccomp_unsupported.go000066400000000000000000000007611304443252500267520ustar00rootroot00000000000000// +build !linux !cgo !seccomp package seccomp import ( "errors" "github.com/opencontainers/runc/libcontainer/configs" ) var ErrSeccompNotEnabled = errors.New("seccomp: config provided but seccomp not supported") // InitSeccomp does nothing because seccomp is not supported. func InitSeccomp(config *configs.Seccomp) error { if config != nil { return ErrSeccompNotEnabled } return nil } // IsEnabled returns false, because it is not supported. func IsEnabled() bool { return false } docker-runc-tags-docker-1.13.1/libcontainer/selinux/000077500000000000000000000000001304443252500223545ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/libcontainer/selinux/selinux.go000066400000000000000000000254741304443252500244060ustar00rootroot00000000000000// +build linux package selinux import ( "bufio" "crypto/rand" "encoding/binary" "fmt" "io" "os" "path/filepath" "regexp" "strconv" "strings" "sync" "syscall" "github.com/opencontainers/runc/libcontainer/system" ) const ( Enforcing = 1 Permissive = 0 Disabled = -1 selinuxDir = "/etc/selinux/" selinuxConfig = selinuxDir + "config" selinuxTypeTag = "SELINUXTYPE" selinuxTag = "SELINUX" selinuxPath = "/sys/fs/selinux" xattrNameSelinux = "security.selinux" stRdOnly = 0x01 ) var ( assignRegex = regexp.MustCompile(`^([^=]+)=(.*)$`) mcsList = make(map[string]bool) mcsLock sync.Mutex selinuxfs = "unknown" selinuxEnabled = false // Stores whether selinux is currently enabled selinuxEnabledChecked = false // Stores whether selinux enablement has been checked or established yet ) type SELinuxContext map[string]string // SetDisabled disables selinux support for the package func SetDisabled() { selinuxEnabled, selinuxEnabledChecked = false, true } // getSelinuxMountPoint returns the path to the mountpoint of an selinuxfs // filesystem or an empty string if no mountpoint is found. Selinuxfs is // a proc-like pseudo-filesystem that exposes the selinux policy API to // processes. The existence of an selinuxfs mount is used to determine // whether selinux is currently enabled or not. func getSelinuxMountPoint() string { if selinuxfs != "unknown" { return selinuxfs } selinuxfs = "" f, err := os.Open("/proc/self/mountinfo") if err != nil { return selinuxfs } defer f.Close() scanner := bufio.NewScanner(f) for scanner.Scan() { txt := scanner.Text() // Safe as mountinfo encodes mountpoints with spaces as \040. sepIdx := strings.Index(txt, " - ") if sepIdx == -1 { continue } if !strings.Contains(txt[sepIdx:], "selinuxfs") { continue } fields := strings.Split(txt, " ") if len(fields) < 5 { continue } selinuxfs = fields[4] break } if selinuxfs != "" { var buf syscall.Statfs_t syscall.Statfs(selinuxfs, &buf) if (buf.Flags & stRdOnly) == 1 { selinuxfs = "" } } return selinuxfs } // SelinuxEnabled returns whether selinux is currently enabled. func SelinuxEnabled() bool { if selinuxEnabledChecked { return selinuxEnabled } selinuxEnabledChecked = true if fs := getSelinuxMountPoint(); fs != "" { if con, _ := Getcon(); con != "kernel" { selinuxEnabled = true } } return selinuxEnabled } func readConfig(target string) (value string) { var ( val, key string bufin *bufio.Reader ) in, err := os.Open(selinuxConfig) if err != nil { return "" } defer in.Close() bufin = bufio.NewReader(in) for done := false; !done; { var line string if line, err = bufin.ReadString('\n'); err != nil { if err != io.EOF { return "" } done = true } line = strings.TrimSpace(line) if len(line) == 0 { // Skip blank lines continue } if line[0] == ';' || line[0] == '#' { // Skip comments continue } if groups := assignRegex.FindStringSubmatch(line); groups != nil { key, val = strings.TrimSpace(groups[1]), strings.TrimSpace(groups[2]) if key == target { return strings.Trim(val, "\"") } } } return "" } func getSELinuxPolicyRoot() string { return selinuxDir + readConfig(selinuxTypeTag) } func readCon(name string) (string, error) { var val string in, err := os.Open(name) if err != nil { return "", err } defer in.Close() _, err = fmt.Fscanf(in, "%s", &val) return val, err } // Setfilecon sets the SELinux label for this path or returns an error. func Setfilecon(path string, scon string) error { return system.Lsetxattr(path, xattrNameSelinux, []byte(scon), 0) } // Getfilecon returns the SELinux label for this path or returns an error. func Getfilecon(path string) (string, error) { con, err := system.Lgetxattr(path, xattrNameSelinux) if err != nil { return "", err } // Trim the NUL byte at the end of the byte buffer, if present. if len(con) > 0 && con[len(con)-1] == '\x00' { con = con[:len(con)-1] } return string(con), nil } func Setfscreatecon(scon string) error { return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/fscreate", syscall.Gettid()), scon) } func Getfscreatecon() (string, error) { return readCon(fmt.Sprintf("/proc/self/task/%d/attr/fscreate", syscall.Gettid())) } // Getcon returns the SELinux label of the current process thread, or an error. func Getcon() (string, error) { return readCon(fmt.Sprintf("/proc/self/task/%d/attr/current", syscall.Gettid())) } // Getpidcon returns the SELinux label of the given pid, or an error. func Getpidcon(pid int) (string, error) { return readCon(fmt.Sprintf("/proc/%d/attr/current", pid)) } func Getexeccon() (string, error) { return readCon(fmt.Sprintf("/proc/self/task/%d/attr/exec", syscall.Gettid())) } func writeCon(name string, val string) error { out, err := os.OpenFile(name, os.O_WRONLY, 0) if err != nil { return err } defer out.Close() if val != "" { _, err = out.Write([]byte(val)) } else { _, err = out.Write(nil) } return err } func Setexeccon(scon string) error { return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/exec", syscall.Gettid()), scon) } func (c SELinuxContext) Get() string { return fmt.Sprintf("%s:%s:%s:%s", c["user"], c["role"], c["type"], c["level"]) } func NewContext(scon string) SELinuxContext { c := make(SELinuxContext) if len(scon) != 0 { con := strings.SplitN(scon, ":", 4) c["user"] = con[0] c["role"] = con[1] c["type"] = con[2] c["level"] = con[3] } return c } func ReserveLabel(scon string) { if len(scon) != 0 { con := strings.SplitN(scon, ":", 4) mcsAdd(con[3]) } } func selinuxEnforcePath() string { return fmt.Sprintf("%s/enforce", selinuxPath) } func SelinuxGetEnforce() int { var enforce int enforceS, err := readCon(selinuxEnforcePath()) if err != nil { return -1 } enforce, err = strconv.Atoi(string(enforceS)) if err != nil { return -1 } return enforce } func SelinuxSetEnforce(mode int) error { return writeCon(selinuxEnforcePath(), fmt.Sprintf("%d", mode)) } func SelinuxGetEnforceMode() int { switch readConfig(selinuxTag) { case "enforcing": return Enforcing case "permissive": return Permissive } return Disabled } func mcsAdd(mcs string) error { mcsLock.Lock() defer mcsLock.Unlock() if mcsList[mcs] { return fmt.Errorf("MCS Label already exists") } mcsList[mcs] = true return nil } func mcsDelete(mcs string) { mcsLock.Lock() mcsList[mcs] = false mcsLock.Unlock() } func IntToMcs(id int, catRange uint32) string { var ( SETSIZE = int(catRange) TIER = SETSIZE ORD = id ) if id < 1 || id > 523776 { return "" } for ORD > TIER { ORD = ORD - TIER TIER-- } TIER = SETSIZE - TIER ORD = ORD + TIER return fmt.Sprintf("s0:c%d,c%d", TIER, ORD) } func uniqMcs(catRange uint32) string { var ( n uint32 c1, c2 uint32 mcs string ) for { binary.Read(rand.Reader, binary.LittleEndian, &n) c1 = n % catRange binary.Read(rand.Reader, binary.LittleEndian, &n) c2 = n % catRange if c1 == c2 { continue } else { if c1 > c2 { t := c1 c1 = c2 c2 = t } } mcs = fmt.Sprintf("s0:c%d,c%d", c1, c2) if err := mcsAdd(mcs); err != nil { continue } break } return mcs } func FreeLxcContexts(scon string) { if len(scon) != 0 { con := strings.SplitN(scon, ":", 4) mcsDelete(con[3]) } } var roFileLabel string func GetROFileLabel() (fileLabel string) { return roFileLabel } func GetLxcContexts() (processLabel string, fileLabel string) { var ( val, key string bufin *bufio.Reader ) if !SelinuxEnabled() { return "", "" } lxcPath := fmt.Sprintf("%s/contexts/lxc_contexts", getSELinuxPolicyRoot()) in, err := os.Open(lxcPath) if err != nil { return "", "" } defer in.Close() bufin = bufio.NewReader(in) for done := false; !done; { var line string if line, err = bufin.ReadString('\n'); err != nil { if err == io.EOF { done = true } else { goto exit } } line = strings.TrimSpace(line) if len(line) == 0 { // Skip blank lines continue } if line[0] == ';' || line[0] == '#' { // Skip comments continue } if groups := assignRegex.FindStringSubmatch(line); groups != nil { key, val = strings.TrimSpace(groups[1]), strings.TrimSpace(groups[2]) if key == "process" { processLabel = strings.Trim(val, "\"") } if key == "file" { fileLabel = strings.Trim(val, "\"") } if key == "ro_file" { roFileLabel = strings.Trim(val, "\"") } } } if processLabel == "" || fileLabel == "" { return "", "" } if roFileLabel == "" { roFileLabel = fileLabel } exit: // mcs := IntToMcs(os.Getpid(), 1024) mcs := uniqMcs(1024) scon := NewContext(processLabel) scon["level"] = mcs processLabel = scon.Get() scon = NewContext(fileLabel) scon["level"] = mcs fileLabel = scon.Get() return processLabel, fileLabel } func SecurityCheckContext(val string) error { return writeCon(fmt.Sprintf("%s.context", selinuxPath), val) } func CopyLevel(src, dest string) (string, error) { if src == "" { return "", nil } if err := SecurityCheckContext(src); err != nil { return "", err } if err := SecurityCheckContext(dest); err != nil { return "", err } scon := NewContext(src) tcon := NewContext(dest) mcsDelete(tcon["level"]) mcsAdd(scon["level"]) tcon["level"] = scon["level"] return tcon.Get(), nil } // Prevent users from relabing system files func badPrefix(fpath string) error { var badprefixes = []string{"/usr"} for _, prefix := range badprefixes { if fpath == prefix || strings.HasPrefix(fpath, fmt.Sprintf("%s/", prefix)) { return fmt.Errorf("Relabeling content in %s is not allowed.", prefix) } } return nil } // Chcon changes the fpath file object to the SELinux label scon. // If the fpath is a directory and recurse is true Chcon will walk the // directory tree setting the label func Chcon(fpath string, scon string, recurse bool) error { if scon == "" { return nil } if err := badPrefix(fpath); err != nil { return err } callback := func(p string, info os.FileInfo, err error) error { return Setfilecon(p, scon) } if recurse { return filepath.Walk(fpath, callback) } return Setfilecon(fpath, scon) } // DupSecOpt takes an SELinux process label and returns security options that // can will set the SELinux Type and Level for future container processes func DupSecOpt(src string) []string { if src == "" { return nil } con := NewContext(src) if con["user"] == "" || con["role"] == "" || con["type"] == "" || con["level"] == "" { return nil } return []string{"label=user:" + con["user"], "label=role:" + con["role"], "label=type:" + con["type"], "label=level:" + con["level"]} } // DisableSecOpt returns a security opt that can be used to disabling SELinux // labeling support for future container processes func DisableSecOpt() []string { return []string{"label=disable"} } docker-runc-tags-docker-1.13.1/libcontainer/selinux/selinux_test.go000066400000000000000000000037241304443252500254370ustar00rootroot00000000000000// +build linux,selinux package selinux_test import ( "os" "testing" "github.com/opencontainers/runc/libcontainer/selinux" ) func TestSetfilecon(t *testing.T) { if selinux.SelinuxEnabled() { tmp := "selinux_test" con := "system_u:object_r:bin_t:s0" out, _ := os.OpenFile(tmp, os.O_WRONLY|os.O_CREATE, 0) out.Close() err := selinux.Setfilecon(tmp, con) if err != nil { t.Log("Setfilecon failed") t.Fatal(err) } filecon, err := selinux.Getfilecon(tmp) if err != nil { t.Log("Getfilecon failed") t.Fatal(err) } if con != filecon { t.Fatal("Getfilecon failed, returned %s expected %s", filecon, con) } os.Remove(tmp) } } func TestSELinux(t *testing.T) { var ( err error plabel, flabel string ) if selinux.SelinuxEnabled() { t.Log("Enabled") plabel, flabel = selinux.GetLxcContexts() t.Log(plabel) t.Log(flabel) selinux.FreeLxcContexts(plabel) plabel, flabel = selinux.GetLxcContexts() t.Log(plabel) t.Log(flabel) selinux.FreeLxcContexts(plabel) t.Log("getenforce ", selinux.SelinuxGetEnforce()) mode := selinux.SelinuxGetEnforceMode() t.Log("getenforcemode ", mode) defer selinux.SelinuxSetEnforce(mode) if err := selinux.SelinuxSetEnforce(selinux.Enforcing); err != nil { t.Fatalf("enforcing selinux failed: %v", err) } if err := selinux.SelinuxSetEnforce(selinux.Permissive); err != nil { t.Fatalf("setting selinux mode to permissive failed: %v", err) } selinux.SelinuxSetEnforce(mode) pid := os.Getpid() t.Logf("PID:%d MCS:%s\n", pid, selinux.IntToMcs(pid, 1023)) err = selinux.Setfscreatecon("unconfined_u:unconfined_r:unconfined_t:s0") if err == nil { t.Log(selinux.Getfscreatecon()) } else { t.Log("setfscreatecon failed", err) t.Fatal(err) } err = selinux.Setfscreatecon("") if err == nil { t.Log(selinux.Getfscreatecon()) } else { t.Log("setfscreatecon failed", err) t.Fatal(err) } t.Log(selinux.Getpidcon(1)) } else { t.Log("Disabled") } } docker-runc-tags-docker-1.13.1/libcontainer/setgroups_linux.go000066400000000000000000000004451304443252500244710ustar00rootroot00000000000000// +build linux,go1.5 package libcontainer import "syscall" // Set the GidMappingsEnableSetgroups member to true, so the process's // setgroups proc entry wont be set to 'deny' if GidMappings are set func enableSetgroups(sys *syscall.SysProcAttr) { sys.GidMappingsEnableSetgroups = true } docker-runc-tags-docker-1.13.1/libcontainer/setns_init_linux.go000066400000000000000000000031371304443252500246160ustar00rootroot00000000000000// +build linux package libcontainer import ( "fmt" "os" "syscall" "github.com/opencontainers/runc/libcontainer/apparmor" "github.com/opencontainers/runc/libcontainer/keys" "github.com/opencontainers/runc/libcontainer/label" "github.com/opencontainers/runc/libcontainer/seccomp" "github.com/opencontainers/runc/libcontainer/system" ) // linuxSetnsInit performs the container's initialization for running a new process // inside an existing container. type linuxSetnsInit struct { config *initConfig stateDirFD int } func (l *linuxSetnsInit) getSessionRingName() string { return fmt.Sprintf("_ses.%s", l.config.ContainerId) } func (l *linuxSetnsInit) Init() error { if !l.config.Config.NoNewKeyring { // do not inherit the parent's session keyring if _, err := keys.JoinSessionKeyring(l.getSessionRingName()); err != nil { return err } } if l.config.NoNewPrivileges { if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil { return err } } if l.config.Config.Seccomp != nil { if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { return err } } if err := finalizeNamespace(l.config); err != nil { return err } if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil { return err } if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil { return err } // close the statedir fd before exec because the kernel resets dumpable in the wrong order // https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318 syscall.Close(l.stateDirFD) return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ()) } docker-runc-tags-docker-1.13.1/libcontainer/specconv/000077500000000000000000000000001304443252500225055ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/libcontainer/specconv/spec_linux.go000066400000000000000000000466241304443252500252210ustar00rootroot00000000000000// +build linux // Package specconv implements conversion of specifications to libcontainer // configurations package specconv import ( "fmt" "os" "path/filepath" "strings" "syscall" "time" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/seccomp" libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" "github.com/opencontainers/runtime-spec/specs-go" ) const wildcard = -1 var namespaceMapping = map[specs.NamespaceType]configs.NamespaceType{ specs.PIDNamespace: configs.NEWPID, specs.NetworkNamespace: configs.NEWNET, specs.MountNamespace: configs.NEWNS, specs.UserNamespace: configs.NEWUSER, specs.IPCNamespace: configs.NEWIPC, specs.UTSNamespace: configs.NEWUTS, } var mountPropagationMapping = map[string]int{ "rprivate": syscall.MS_PRIVATE | syscall.MS_REC, "private": syscall.MS_PRIVATE, "rslave": syscall.MS_SLAVE | syscall.MS_REC, "slave": syscall.MS_SLAVE, "rshared": syscall.MS_SHARED | syscall.MS_REC, "shared": syscall.MS_SHARED, "": syscall.MS_PRIVATE | syscall.MS_REC, } var allowedDevices = []*configs.Device{ // allow mknod for any device { Type: 'c', Major: wildcard, Minor: wildcard, Permissions: "m", Allow: true, }, { Type: 'b', Major: wildcard, Minor: wildcard, Permissions: "m", Allow: true, }, { Type: 'c', Path: "/dev/null", Major: 1, Minor: 3, Permissions: "rwm", Allow: true, }, { Type: 'c', Path: "/dev/random", Major: 1, Minor: 8, Permissions: "rwm", Allow: true, }, { Type: 'c', Path: "/dev/full", Major: 1, Minor: 7, Permissions: "rwm", Allow: true, }, { Type: 'c', Path: "/dev/tty", Major: 5, Minor: 0, Permissions: "rwm", Allow: true, }, { Type: 'c', Path: "/dev/zero", Major: 1, Minor: 5, Permissions: "rwm", Allow: true, }, { Type: 'c', Path: "/dev/urandom", Major: 1, Minor: 9, Permissions: "rwm", Allow: true, }, { Path: "/dev/console", Type: 'c', Major: 5, Minor: 1, Permissions: "rwm", Allow: true, }, // /dev/pts/ - pts namespaces are "coming soon" { Path: "", Type: 'c', Major: 136, Minor: wildcard, Permissions: "rwm", Allow: true, }, { Path: "", Type: 'c', Major: 5, Minor: 2, Permissions: "rwm", Allow: true, }, // tuntap { Path: "", Type: 'c', Major: 10, Minor: 200, Permissions: "rwm", Allow: true, }, } type CreateOpts struct { CgroupName string UseSystemdCgroup bool NoPivotRoot bool NoNewKeyring bool Spec *specs.Spec } // CreateLibcontainerConfig creates a new libcontainer configuration from a // given specification and a cgroup name func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) { // runc's cwd will always be the bundle path rcwd, err := os.Getwd() if err != nil { return nil, err } cwd, err := filepath.Abs(rcwd) if err != nil { return nil, err } spec := opts.Spec rootfsPath := spec.Root.Path if !filepath.IsAbs(rootfsPath) { rootfsPath = filepath.Join(cwd, rootfsPath) } labels := []string{} for k, v := range spec.Annotations { labels = append(labels, fmt.Sprintf("%s=%s", k, v)) } config := &configs.Config{ Rootfs: rootfsPath, NoPivotRoot: opts.NoPivotRoot, Readonlyfs: spec.Root.Readonly, Hostname: spec.Hostname, Labels: append(labels, fmt.Sprintf("bundle=%s", cwd)), NoNewKeyring: opts.NoNewKeyring, } exists := false if config.RootPropagation, exists = mountPropagationMapping[spec.Linux.RootfsPropagation]; !exists { return nil, fmt.Errorf("rootfsPropagation=%v is not supported", spec.Linux.RootfsPropagation) } for _, ns := range spec.Linux.Namespaces { t, exists := namespaceMapping[ns.Type] if !exists { return nil, fmt.Errorf("namespace %q does not exist", ns) } if config.Namespaces.Contains(t) { return nil, fmt.Errorf("malformed spec file: duplicated ns %q", ns) } config.Namespaces.Add(t, ns.Path) } if config.Namespaces.Contains(configs.NEWNET) { config.Networks = []*configs.Network{ { Type: "loopback", }, } } for _, m := range spec.Mounts { config.Mounts = append(config.Mounts, createLibcontainerMount(cwd, m)) } if err := createDevices(spec, config); err != nil { return nil, err } if err := setupUserNamespace(spec, config); err != nil { return nil, err } c, err := createCgroupConfig(opts.CgroupName, opts.UseSystemdCgroup, spec) if err != nil { return nil, err } config.Cgroups = c // set extra path masking for libcontainer for the various unsafe places in proc config.MaskPaths = spec.Linux.MaskedPaths config.ReadonlyPaths = spec.Linux.ReadonlyPaths if spec.Linux.Seccomp != nil { seccomp, err := setupSeccomp(spec.Linux.Seccomp) if err != nil { return nil, err } config.Seccomp = seccomp } if spec.Process.SelinuxLabel != "" { config.ProcessLabel = spec.Process.SelinuxLabel } config.Sysctl = spec.Linux.Sysctl if spec.Linux.Resources != nil && spec.Linux.Resources.OOMScoreAdj != nil { config.OomScoreAdj = *spec.Linux.Resources.OOMScoreAdj } createHooks(spec, config) config.MountLabel = spec.Linux.MountLabel config.Version = specs.Version return config, nil } func createLibcontainerMount(cwd string, m specs.Mount) *configs.Mount { flags, pgflags, data, ext := parseMountOptions(m.Options) source := m.Source if m.Type == "bind" { if !filepath.IsAbs(source) { source = filepath.Join(cwd, m.Source) } } return &configs.Mount{ Device: m.Type, Source: source, Destination: m.Destination, Data: data, Flags: flags, PropagationFlags: pgflags, Extensions: ext, } } func createCgroupConfig(name string, useSystemdCgroup bool, spec *specs.Spec) (*configs.Cgroup, error) { var myCgroupPath string c := &configs.Cgroup{ Resources: &configs.Resources{}, } if spec.Linux != nil && spec.Linux.CgroupsPath != nil { myCgroupPath = libcontainerUtils.CleanPath(*spec.Linux.CgroupsPath) if useSystemdCgroup { myCgroupPath = *spec.Linux.CgroupsPath } } if useSystemdCgroup { if myCgroupPath == "" { c.Parent = "system.slice" c.ScopePrefix = "runc" c.Name = name } else { // Parse the path from expected "slice:prefix:name" // for e.g. "system.slice:docker:1234" parts := strings.Split(myCgroupPath, ":") if len(parts) != 3 { return nil, fmt.Errorf("expected cgroupsPath to be of format \"slice:prefix:name\" for systemd cgroups") } c.Parent = parts[0] c.ScopePrefix = parts[1] c.Name = parts[2] } } else { if myCgroupPath == "" { c.Name = name } c.Path = myCgroupPath } c.Resources.AllowedDevices = allowedDevices if spec.Linux == nil { return c, nil } r := spec.Linux.Resources if r == nil { return c, nil } for i, d := range spec.Linux.Resources.Devices { var ( t = "a" major = int64(-1) minor = int64(-1) ) if d.Type != nil { t = *d.Type } if d.Major != nil { major = *d.Major } if d.Minor != nil { minor = *d.Minor } if d.Access == nil || *d.Access == "" { return nil, fmt.Errorf("device access at %d field cannot be empty", i) } dt, err := stringToDeviceRune(t) if err != nil { return nil, err } dd := &configs.Device{ Type: dt, Major: major, Minor: minor, Permissions: *d.Access, Allow: d.Allow, } c.Resources.Devices = append(c.Resources.Devices, dd) } // append the default allowed devices to the end of the list c.Resources.Devices = append(c.Resources.Devices, allowedDevices...) if r.Memory != nil { if r.Memory.Limit != nil { c.Resources.Memory = int64(*r.Memory.Limit) } if r.Memory.Reservation != nil { c.Resources.MemoryReservation = int64(*r.Memory.Reservation) } if r.Memory.Swap != nil { c.Resources.MemorySwap = int64(*r.Memory.Swap) } if r.Memory.Kernel != nil { c.Resources.KernelMemory = int64(*r.Memory.Kernel) } if r.Memory.KernelTCP != nil { c.Resources.KernelMemoryTCP = int64(*r.Memory.KernelTCP) } if r.Memory.Swappiness != nil { swappiness := int64(*r.Memory.Swappiness) c.Resources.MemorySwappiness = &swappiness } } if r.CPU != nil { if r.CPU.Shares != nil { c.Resources.CpuShares = int64(*r.CPU.Shares) } if r.CPU.Quota != nil { c.Resources.CpuQuota = int64(*r.CPU.Quota) } if r.CPU.Period != nil { c.Resources.CpuPeriod = int64(*r.CPU.Period) } if r.CPU.RealtimeRuntime != nil { c.Resources.CpuRtRuntime = int64(*r.CPU.RealtimeRuntime) } if r.CPU.RealtimePeriod != nil { c.Resources.CpuRtPeriod = int64(*r.CPU.RealtimePeriod) } if r.CPU.Cpus != nil { c.Resources.CpusetCpus = *r.CPU.Cpus } if r.CPU.Mems != nil { c.Resources.CpusetMems = *r.CPU.Mems } } if r.Pids != nil && r.Pids.Limit != nil { c.Resources.PidsLimit = *r.Pids.Limit } if r.BlockIO != nil { if r.BlockIO.Weight != nil { c.Resources.BlkioWeight = *r.BlockIO.Weight } if r.BlockIO.LeafWeight != nil { c.Resources.BlkioLeafWeight = *r.BlockIO.LeafWeight } if r.BlockIO.WeightDevice != nil { for _, wd := range r.BlockIO.WeightDevice { var weight, leafWeight uint16 if wd.Weight != nil { weight = *wd.Weight } if wd.LeafWeight != nil { leafWeight = *wd.LeafWeight } weightDevice := configs.NewWeightDevice(wd.Major, wd.Minor, weight, leafWeight) c.Resources.BlkioWeightDevice = append(c.Resources.BlkioWeightDevice, weightDevice) } } if r.BlockIO.ThrottleReadBpsDevice != nil { for _, td := range r.BlockIO.ThrottleReadBpsDevice { var rate uint64 if td.Rate != nil { rate = *td.Rate } throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) c.Resources.BlkioThrottleReadBpsDevice = append(c.Resources.BlkioThrottleReadBpsDevice, throttleDevice) } } if r.BlockIO.ThrottleWriteBpsDevice != nil { for _, td := range r.BlockIO.ThrottleWriteBpsDevice { var rate uint64 if td.Rate != nil { rate = *td.Rate } throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) c.Resources.BlkioThrottleWriteBpsDevice = append(c.Resources.BlkioThrottleWriteBpsDevice, throttleDevice) } } if r.BlockIO.ThrottleReadIOPSDevice != nil { for _, td := range r.BlockIO.ThrottleReadIOPSDevice { var rate uint64 if td.Rate != nil { rate = *td.Rate } throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) c.Resources.BlkioThrottleReadIOPSDevice = append(c.Resources.BlkioThrottleReadIOPSDevice, throttleDevice) } } if r.BlockIO.ThrottleWriteIOPSDevice != nil { for _, td := range r.BlockIO.ThrottleWriteIOPSDevice { var rate uint64 if td.Rate != nil { rate = *td.Rate } throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) c.Resources.BlkioThrottleWriteIOPSDevice = append(c.Resources.BlkioThrottleWriteIOPSDevice, throttleDevice) } } } for _, l := range r.HugepageLimits { if l.Pagesize == nil || l.Limit == nil { return nil, fmt.Errorf("pagesize and limit can not be empty") } c.Resources.HugetlbLimit = append(c.Resources.HugetlbLimit, &configs.HugepageLimit{ Pagesize: *l.Pagesize, Limit: *l.Limit, }) } if r.DisableOOMKiller != nil { c.Resources.OomKillDisable = *r.DisableOOMKiller } if r.Network != nil { if r.Network.ClassID != nil { c.Resources.NetClsClassid = *r.Network.ClassID } for _, m := range r.Network.Priorities { c.Resources.NetPrioIfpriomap = append(c.Resources.NetPrioIfpriomap, &configs.IfPrioMap{ Interface: m.Name, Priority: int64(m.Priority), }) } } return c, nil } func stringToDeviceRune(s string) (rune, error) { switch s { case "a": return 'a', nil case "b": return 'b', nil case "c": return 'c', nil default: return 0, fmt.Errorf("invalid device type %q", s) } } func createDevices(spec *specs.Spec, config *configs.Config) error { // add whitelisted devices config.Devices = []*configs.Device{ { Type: 'c', Path: "/dev/null", Major: 1, Minor: 3, FileMode: 0666, Uid: 0, Gid: 0, }, { Type: 'c', Path: "/dev/random", Major: 1, Minor: 8, FileMode: 0666, Uid: 0, Gid: 0, }, { Type: 'c', Path: "/dev/full", Major: 1, Minor: 7, FileMode: 0666, Uid: 0, Gid: 0, }, { Type: 'c', Path: "/dev/tty", Major: 5, Minor: 0, FileMode: 0666, Uid: 0, Gid: 0, }, { Type: 'c', Path: "/dev/zero", Major: 1, Minor: 5, FileMode: 0666, Uid: 0, Gid: 0, }, { Type: 'c', Path: "/dev/urandom", Major: 1, Minor: 9, FileMode: 0666, Uid: 0, Gid: 0, }, } // merge in additional devices from the spec for _, d := range spec.Linux.Devices { var uid, gid uint32 var filemode os.FileMode = 0666 if d.UID != nil { uid = *d.UID } if d.GID != nil { gid = *d.GID } dt, err := stringToDeviceRune(d.Type) if err != nil { return err } if d.FileMode != nil { filemode = *d.FileMode } device := &configs.Device{ Type: dt, Path: d.Path, Major: d.Major, Minor: d.Minor, FileMode: filemode, Uid: uid, Gid: gid, } config.Devices = append(config.Devices, device) } return nil } func setupUserNamespace(spec *specs.Spec, config *configs.Config) error { if len(spec.Linux.UIDMappings) == 0 { return nil } create := func(m specs.IDMapping) configs.IDMap { return configs.IDMap{ HostID: int(m.HostID), ContainerID: int(m.ContainerID), Size: int(m.Size), } } for _, m := range spec.Linux.UIDMappings { config.UidMappings = append(config.UidMappings, create(m)) } for _, m := range spec.Linux.GIDMappings { config.GidMappings = append(config.GidMappings, create(m)) } rootUID, err := config.HostUID() if err != nil { return err } rootGID, err := config.HostGID() if err != nil { return err } for _, node := range config.Devices { node.Uid = uint32(rootUID) node.Gid = uint32(rootGID) } return nil } // parseMountOptions parses the string and returns the flags, propagation // flags and any mount data that it contains. func parseMountOptions(options []string) (int, []int, string, int) { var ( flag int pgflag []int data []string extFlags int ) flags := map[string]struct { clear bool flag int }{ "async": {true, syscall.MS_SYNCHRONOUS}, "atime": {true, syscall.MS_NOATIME}, "bind": {false, syscall.MS_BIND}, "defaults": {false, 0}, "dev": {true, syscall.MS_NODEV}, "diratime": {true, syscall.MS_NODIRATIME}, "dirsync": {false, syscall.MS_DIRSYNC}, "exec": {true, syscall.MS_NOEXEC}, "mand": {false, syscall.MS_MANDLOCK}, "noatime": {false, syscall.MS_NOATIME}, "nodev": {false, syscall.MS_NODEV}, "nodiratime": {false, syscall.MS_NODIRATIME}, "noexec": {false, syscall.MS_NOEXEC}, "nomand": {true, syscall.MS_MANDLOCK}, "norelatime": {true, syscall.MS_RELATIME}, "nostrictatime": {true, syscall.MS_STRICTATIME}, "nosuid": {false, syscall.MS_NOSUID}, "rbind": {false, syscall.MS_BIND | syscall.MS_REC}, "relatime": {false, syscall.MS_RELATIME}, "remount": {false, syscall.MS_REMOUNT}, "ro": {false, syscall.MS_RDONLY}, "rw": {true, syscall.MS_RDONLY}, "strictatime": {false, syscall.MS_STRICTATIME}, "suid": {true, syscall.MS_NOSUID}, "sync": {false, syscall.MS_SYNCHRONOUS}, } propagationFlags := map[string]int{ "private": syscall.MS_PRIVATE, "shared": syscall.MS_SHARED, "slave": syscall.MS_SLAVE, "unbindable": syscall.MS_UNBINDABLE, "rprivate": syscall.MS_PRIVATE | syscall.MS_REC, "rshared": syscall.MS_SHARED | syscall.MS_REC, "rslave": syscall.MS_SLAVE | syscall.MS_REC, "runbindable": syscall.MS_UNBINDABLE | syscall.MS_REC, } extensionFlags := map[string]struct { clear bool flag int }{ "tmpcopyup": {false, configs.EXT_COPYUP}, } for _, o := range options { // If the option does not exist in the flags table or the flag // is not supported on the platform, // then it is a data value for a specific fs type if f, exists := flags[o]; exists && f.flag != 0 { if f.clear { flag &= ^f.flag } else { flag |= f.flag } } else if f, exists := propagationFlags[o]; exists && f != 0 { pgflag = append(pgflag, f) } else if f, exists := extensionFlags[o]; exists && f.flag != 0 { if f.clear { extFlags &= ^f.flag } else { extFlags |= f.flag } } else { data = append(data, o) } } return flag, pgflag, strings.Join(data, ","), extFlags } func setupSeccomp(config *specs.Seccomp) (*configs.Seccomp, error) { if config == nil { return nil, nil } // No default action specified, no syscalls listed, assume seccomp disabled if config.DefaultAction == "" && len(config.Syscalls) == 0 { return nil, nil } newConfig := new(configs.Seccomp) newConfig.Syscalls = []*configs.Syscall{} if len(config.Architectures) > 0 { newConfig.Architectures = []string{} for _, arch := range config.Architectures { newArch, err := seccomp.ConvertStringToArch(string(arch)) if err != nil { return nil, err } newConfig.Architectures = append(newConfig.Architectures, newArch) } } // Convert default action from string representation newDefaultAction, err := seccomp.ConvertStringToAction(string(config.DefaultAction)) if err != nil { return nil, err } newConfig.DefaultAction = newDefaultAction // Loop through all syscall blocks and convert them to libcontainer format for _, call := range config.Syscalls { newAction, err := seccomp.ConvertStringToAction(string(call.Action)) if err != nil { return nil, err } newCall := configs.Syscall{ Name: call.Name, Action: newAction, Args: []*configs.Arg{}, } // Loop through all the arguments of the syscall and convert them for _, arg := range call.Args { newOp, err := seccomp.ConvertStringToOperator(string(arg.Op)) if err != nil { return nil, err } newArg := configs.Arg{ Index: arg.Index, Value: arg.Value, ValueTwo: arg.ValueTwo, Op: newOp, } newCall.Args = append(newCall.Args, &newArg) } newConfig.Syscalls = append(newConfig.Syscalls, &newCall) } return newConfig, nil } func createHooks(rspec *specs.Spec, config *configs.Config) { config.Hooks = &configs.Hooks{} for _, h := range rspec.Hooks.Prestart { cmd := createCommandHook(h) config.Hooks.Prestart = append(config.Hooks.Prestart, configs.NewCommandHook(cmd)) } for _, h := range rspec.Hooks.Poststart { cmd := createCommandHook(h) config.Hooks.Poststart = append(config.Hooks.Poststart, configs.NewCommandHook(cmd)) } for _, h := range rspec.Hooks.Poststop { cmd := createCommandHook(h) config.Hooks.Poststop = append(config.Hooks.Poststop, configs.NewCommandHook(cmd)) } } func createCommandHook(h specs.Hook) configs.Command { cmd := configs.Command{ Path: h.Path, Args: h.Args, Env: h.Env, } if h.Timeout != nil { d := time.Duration(*h.Timeout) * time.Second cmd.Timeout = &d } return cmd } docker-runc-tags-docker-1.13.1/libcontainer/specconv/spec_linux_test.go000066400000000000000000000023361304443252500262500ustar00rootroot00000000000000// +build linux package specconv import ( "testing" "github.com/opencontainers/runtime-spec/specs-go" ) func TestLinuxCgroupsPathSpecified(t *testing.T) { cgroupsPath := "/user/cgroups/path/id" spec := &specs.Spec{} spec.Linux = &specs.Linux{ CgroupsPath: &cgroupsPath, } cgroup, err := createCgroupConfig("ContainerID", false, spec) if err != nil { t.Errorf("Couldn't create Cgroup config: %v", err) } if cgroup.Path != cgroupsPath { t.Errorf("Wrong cgroupsPath, expected '%s' got '%s'", cgroupsPath, cgroup.Path) } } func TestLinuxCgroupsPathNotSpecified(t *testing.T) { spec := &specs.Spec{} cgroup, err := createCgroupConfig("ContainerID", false, spec) if err != nil { t.Errorf("Couldn't create Cgroup config: %v", err) } if cgroup.Path != "" { t.Errorf("Wrong cgroupsPath, expected it to be empty string, got '%s'", cgroup.Path) } } func TestDupNamespaces(t *testing.T) { spec := &specs.Spec{ Linux: &specs.Linux{ Namespaces: []specs.Namespace{ { Type: "pid", }, { Type: "pid", Path: "/proc/1/ns/pid", }, }, }, } _, err := CreateLibcontainerConfig(&CreateOpts{ Spec: spec, }) if err == nil { t.Errorf("Duplicated namespaces should be forbidden") } } docker-runc-tags-docker-1.13.1/libcontainer/stacktrace/000077500000000000000000000000001304443252500230115ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/libcontainer/stacktrace/capture.go000066400000000000000000000011441304443252500250030ustar00rootroot00000000000000package stacktrace import "runtime" // Capture captures a stacktrace for the current calling go program // // skip is the number of frames to skip func Capture(userSkip int) Stacktrace { var ( skip = userSkip + 1 // add one for our own function frames []Frame prevPc uintptr ) for i := skip; ; i++ { pc, file, line, ok := runtime.Caller(i) //detect if caller is repeated to avoid loop, gccgo //currently runs into a loop without this check if !ok || pc == prevPc { break } frames = append(frames, NewFrame(pc, file, line)) prevPc = pc } return Stacktrace{ Frames: frames, } } docker-runc-tags-docker-1.13.1/libcontainer/stacktrace/capture_test.go000066400000000000000000000013741304443252500260470ustar00rootroot00000000000000package stacktrace import ( "strings" "testing" ) func captureFunc() Stacktrace { return Capture(0) } func TestCaptureTestFunc(t *testing.T) { stack := captureFunc() if len(stack.Frames) == 0 { t.Fatal("expected stack frames to be returned") } // the first frame is the caller frame := stack.Frames[0] if expected := "captureFunc"; frame.Function != expected { t.Fatalf("expteced function %q but recevied %q", expected, frame.Function) } expected := "/runc/libcontainer/stacktrace" if !strings.HasSuffix(frame.Package, expected) { t.Fatalf("expected package %q but received %q", expected, frame.Package) } if expected := "capture_test.go"; frame.File != expected { t.Fatalf("expected file %q but received %q", expected, frame.File) } } docker-runc-tags-docker-1.13.1/libcontainer/stacktrace/frame.go000066400000000000000000000013341304443252500244330ustar00rootroot00000000000000package stacktrace import ( "path/filepath" "runtime" "strings" ) // NewFrame returns a new stack frame for the provided information func NewFrame(pc uintptr, file string, line int) Frame { fn := runtime.FuncForPC(pc) if fn == nil { return Frame{} } pack, name := parseFunctionName(fn.Name()) return Frame{ Line: line, File: filepath.Base(file), Package: pack, Function: name, } } func parseFunctionName(name string) (string, string) { i := strings.LastIndex(name, ".") if i == -1 { return "", name } return name[:i], name[i+1:] } // Frame contains all the information for a stack frame within a go program type Frame struct { File string Function string Package string Line int } docker-runc-tags-docker-1.13.1/libcontainer/stacktrace/frame_test.go000066400000000000000000000010631304443252500254710ustar00rootroot00000000000000package stacktrace import "testing" func TestParsePackageName(t *testing.T) { var ( name = "github.com/opencontainers/runc/libcontainer/stacktrace.captureFunc" expectedPackage = "github.com/opencontainers/runc/libcontainer/stacktrace" expectedFunction = "captureFunc" ) pack, funcName := parseFunctionName(name) if pack != expectedPackage { t.Fatalf("expected package %q but received %q", expectedPackage, pack) } if funcName != expectedFunction { t.Fatalf("expected function %q but received %q", expectedFunction, funcName) } } docker-runc-tags-docker-1.13.1/libcontainer/stacktrace/stacktrace.go000066400000000000000000000000771304443252500254700ustar00rootroot00000000000000package stacktrace type Stacktrace struct { Frames []Frame } docker-runc-tags-docker-1.13.1/libcontainer/standard_init_linux.go000066400000000000000000000126571304443252500252710ustar00rootroot00000000000000// +build linux package libcontainer import ( "fmt" "io" "os" "os/exec" "syscall" "github.com/opencontainers/runc/libcontainer/apparmor" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/keys" "github.com/opencontainers/runc/libcontainer/label" "github.com/opencontainers/runc/libcontainer/seccomp" "github.com/opencontainers/runc/libcontainer/system" ) type linuxStandardInit struct { pipe io.ReadWriteCloser parentPid int stateDirFD int config *initConfig } func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) { var newperms uint32 if l.config.Config.Namespaces.Contains(configs.NEWUSER) { // with user ns we need 'other' search permissions newperms = 0x8 } else { // without user ns we need 'UID' search permissions newperms = 0x80000 } // create a unique per session container name that we can // join in setns; however, other containers can also join it return fmt.Sprintf("_ses.%s", l.config.ContainerId), 0xffffffff, newperms } // PR_SET_NO_NEW_PRIVS isn't exposed in Golang so we define it ourselves copying the value // the kernel const PR_SET_NO_NEW_PRIVS = 0x26 func (l *linuxStandardInit) Init() error { if !l.config.Config.NoNewKeyring { ringname, keepperms, newperms := l.getSessionRingParams() // do not inherit the parent's session keyring sessKeyId, err := keys.JoinSessionKeyring(ringname) if err != nil { return err } // make session keyring searcheable if err := keys.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil { return err } } var console *linuxConsole if l.config.Console != "" { console = newConsoleFromPath(l.config.Console) if err := console.dupStdio(); err != nil { return err } } if console != nil { if err := system.Setctty(); err != nil { return err } } if err := setupNetwork(l.config); err != nil { return err } if err := setupRoute(l.config.Config); err != nil { return err } label.Init() // InitializeMountNamespace() can be executed only for a new mount namespace if l.config.Config.Namespaces.Contains(configs.NEWNS) { if err := setupRootfs(l.config.Config, console, l.pipe); err != nil { return err } } if hostname := l.config.Config.Hostname; hostname != "" { if err := syscall.Sethostname([]byte(hostname)); err != nil { return err } } if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil { return err } if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil { return err } for key, value := range l.config.Config.Sysctl { if err := writeSystemProperty(key, value); err != nil { return err } } for _, path := range l.config.Config.ReadonlyPaths { if err := remountReadonly(path); err != nil { return err } } for _, path := range l.config.Config.MaskPaths { if err := maskPath(path); err != nil { return err } } pdeath, err := system.GetParentDeathSignal() if err != nil { return err } if l.config.NoNewPrivileges { if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil { return err } } // Tell our parent that we're ready to Execv. This must be done before the // Seccomp rules have been applied, because we need to be able to read and // write to a socket. if err := syncParentReady(l.pipe); err != nil { return err } // Without NoNewPrivileges seccomp is a privileged operation, so we need to // do this before dropping capabilities; otherwise do it as late as possible // just before execve so as few syscalls take place after it as possible. if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges { if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { return err } } if err := finalizeNamespace(l.config); err != nil { return err } // finalizeNamespace can change user/group which clears the parent death // signal, so we restore it here. if err := pdeath.Restore(); err != nil { return err } // compare the parent from the initial start of the init process and make sure that it did not change. // if the parent changes that means it died and we were reparented to something else so we should // just kill ourself and not cause problems for someone else. if syscall.Getppid() != l.parentPid { return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) } // check for the arg before waiting to make sure it exists and it is returned // as a create time error. name, err := exec.LookPath(l.config.Args[0]) if err != nil { return err } // close the pipe to signal that we have completed our init. l.pipe.Close() // wait for the fifo to be opened on the other side before // exec'ing the users process. fd, err := syscall.Openat(l.stateDirFD, execFifoFilename, os.O_WRONLY|syscall.O_CLOEXEC, 0) if err != nil { return newSystemErrorWithCause(err, "openat exec fifo") } if _, err := syscall.Write(fd, []byte("0")); err != nil { return newSystemErrorWithCause(err, "write 0 exec fifo") } if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges { if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { return newSystemErrorWithCause(err, "init seccomp") } } // close the statedir fd before exec because the kernel resets dumpable in the wrong order // https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318 syscall.Close(l.stateDirFD) if err := syscall.Exec(name, l.config.Args[0:], os.Environ()); err != nil { return newSystemErrorWithCause(err, "exec user process") } return nil } docker-runc-tags-docker-1.13.1/libcontainer/state_linux.go000066400000000000000000000123101304443252500235500ustar00rootroot00000000000000// +build linux package libcontainer import ( "fmt" "os" "path/filepath" "syscall" "github.com/Sirupsen/logrus" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/utils" ) func newStateTransitionError(from, to containerState) error { return &stateTransitionError{ From: from.status().String(), To: to.status().String(), } } // stateTransitionError is returned when an invalid state transition happens from one // state to another. type stateTransitionError struct { From string To string } func (s *stateTransitionError) Error() string { return fmt.Sprintf("invalid state transition from %s to %s", s.From, s.To) } type containerState interface { transition(containerState) error destroy() error status() Status } func destroy(c *linuxContainer) error { if !c.config.Namespaces.Contains(configs.NEWPID) { if err := signalAllProcesses(c.cgroupManager, syscall.SIGKILL); err != nil { logrus.Warn(err) } } err := c.cgroupManager.Destroy() if rerr := os.RemoveAll(c.root); err == nil { err = rerr } c.initProcess = nil if herr := runPoststopHooks(c); err == nil { err = herr } c.state = &stoppedState{c: c} return err } func runPoststopHooks(c *linuxContainer) error { if c.config.Hooks != nil { s := configs.HookState{ Version: c.config.Version, ID: c.id, Root: c.config.Rootfs, BundlePath: utils.SearchLabels(c.config.Labels, "bundle"), } for _, hook := range c.config.Hooks.Poststop { if err := hook.Run(s); err != nil { return err } } } return nil } // stoppedState represents a container is a stopped/destroyed state. type stoppedState struct { c *linuxContainer } func (b *stoppedState) status() Status { return Stopped } func (b *stoppedState) transition(s containerState) error { switch s.(type) { case *runningState, *restoredState: b.c.state = s return nil case *stoppedState: return nil } return newStateTransitionError(b, s) } func (b *stoppedState) destroy() error { return destroy(b.c) } // runningState represents a container that is currently running. type runningState struct { c *linuxContainer } func (r *runningState) status() Status { return Running } func (r *runningState) transition(s containerState) error { switch s.(type) { case *stoppedState: t, err := r.c.runType() if err != nil { return err } if t == Running { return newGenericError(fmt.Errorf("container still running"), ContainerNotStopped) } r.c.state = s return nil case *pausedState: r.c.state = s return nil case *runningState: return nil } return newStateTransitionError(r, s) } func (r *runningState) destroy() error { t, err := r.c.runType() if err != nil { return err } if t == Running { return newGenericError(fmt.Errorf("container is not destroyed"), ContainerNotStopped) } return destroy(r.c) } type createdState struct { c *linuxContainer } func (i *createdState) status() Status { return Created } func (i *createdState) transition(s containerState) error { switch s.(type) { case *runningState, *pausedState, *stoppedState: i.c.state = s return nil case *createdState: return nil } return newStateTransitionError(i, s) } func (i *createdState) destroy() error { i.c.initProcess.signal(syscall.SIGKILL) return destroy(i.c) } // pausedState represents a container that is currently pause. It cannot be destroyed in a // paused state and must transition back to running first. type pausedState struct { c *linuxContainer } func (p *pausedState) status() Status { return Paused } func (p *pausedState) transition(s containerState) error { switch s.(type) { case *runningState, *stoppedState: p.c.state = s return nil case *pausedState: return nil } return newStateTransitionError(p, s) } func (p *pausedState) destroy() error { t, err := p.c.runType() if err != nil { return err } if t != Running && t != Created { if err := p.c.cgroupManager.Freeze(configs.Thawed); err != nil { return err } return destroy(p.c) } return newGenericError(fmt.Errorf("container is paused"), ContainerPaused) } // restoredState is the same as the running state but also has accociated checkpoint // information that maybe need destroyed when the container is stopped and destroy is called. type restoredState struct { imageDir string c *linuxContainer } func (r *restoredState) status() Status { return Running } func (r *restoredState) transition(s containerState) error { switch s.(type) { case *stoppedState, *runningState: return nil } return newStateTransitionError(r, s) } func (r *restoredState) destroy() error { if _, err := os.Stat(filepath.Join(r.c.root, "checkpoint")); err != nil { if !os.IsNotExist(err) { return err } } return destroy(r.c) } // loadedState is used whenever a container is restored, loaded, or setting additional // processes inside and it should not be destroyed when it is exiting. type loadedState struct { c *linuxContainer s Status } func (n *loadedState) status() Status { return n.s } func (n *loadedState) transition(s containerState) error { n.c.state = s return nil } func (n *loadedState) destroy() error { if err := n.c.refreshState(); err != nil { return err } return n.c.state.destroy() } docker-runc-tags-docker-1.13.1/libcontainer/state_linux_test.go000066400000000000000000000032421304443252500246130ustar00rootroot00000000000000// +build linux package libcontainer import "testing" func TestStateStatus(t *testing.T) { states := map[containerState]Status{ &stoppedState{}: Stopped, &runningState{}: Running, &restoredState{}: Running, &pausedState{}: Paused, &createdState{}: Created, } for s, status := range states { if s.status() != status { t.Fatalf("state returned %s but expected %s", s.status(), status) } } } func isStateTransitionError(err error) bool { _, ok := err.(*stateTransitionError) return ok } func TestStoppedStateTransition(t *testing.T) { s := &stoppedState{c: &linuxContainer{}} valid := []containerState{ &stoppedState{}, &runningState{}, &restoredState{}, } for _, v := range valid { if err := s.transition(v); err != nil { t.Fatal(err) } } err := s.transition(&pausedState{}) if err == nil { t.Fatal("transition to paused state should fail") } if !isStateTransitionError(err) { t.Fatal("expected stateTransitionError") } } func TestPausedStateTransition(t *testing.T) { s := &pausedState{c: &linuxContainer{}} valid := []containerState{ &pausedState{}, &runningState{}, &stoppedState{}, } for _, v := range valid { if err := s.transition(v); err != nil { t.Fatal(err) } } } func TestRestoredStateTransition(t *testing.T) { s := &restoredState{c: &linuxContainer{}} valid := []containerState{ &stoppedState{}, &runningState{}, } for _, v := range valid { if err := s.transition(v); err != nil { t.Fatal(err) } } err := s.transition(&createdState{}) if err == nil { t.Fatal("transition to created state should fail") } if !isStateTransitionError(err) { t.Fatal("expected stateTransitionError") } } docker-runc-tags-docker-1.13.1/libcontainer/stats.go000066400000000000000000000004041304443252500223500ustar00rootroot00000000000000package libcontainer type NetworkInterface struct { // Name is the name of the network interface. Name string RxBytes uint64 RxPackets uint64 RxErrors uint64 RxDropped uint64 TxBytes uint64 TxPackets uint64 TxErrors uint64 TxDropped uint64 } docker-runc-tags-docker-1.13.1/libcontainer/stats_freebsd.go000066400000000000000000000001141304443252500240400ustar00rootroot00000000000000package libcontainer type Stats struct { Interfaces []*NetworkInterface } docker-runc-tags-docker-1.13.1/libcontainer/stats_linux.go000066400000000000000000000002471304443252500235740ustar00rootroot00000000000000package libcontainer import "github.com/opencontainers/runc/libcontainer/cgroups" type Stats struct { Interfaces []*NetworkInterface CgroupStats *cgroups.Stats } docker-runc-tags-docker-1.13.1/libcontainer/stats_solaris.go000066400000000000000000000001371304443252500241070ustar00rootroot00000000000000package libcontainer // Solaris - TODO type Stats struct { Interfaces []*NetworkInterface } docker-runc-tags-docker-1.13.1/libcontainer/stats_windows.go000066400000000000000000000001141304443252500241200ustar00rootroot00000000000000package libcontainer type Stats struct { Interfaces []*NetworkInterface } docker-runc-tags-docker-1.13.1/libcontainer/system/000077500000000000000000000000001304443252500222115ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/libcontainer/system/linux.go000066400000000000000000000067271304443252500237130ustar00rootroot00000000000000// +build linux package system import ( "bufio" "fmt" "os" "os/exec" "syscall" "unsafe" ) // If arg2 is nonzero, set the "child subreaper" attribute of the // calling process; if arg2 is zero, unset the attribute. When a // process is marked as a child subreaper, all of the children // that it creates, and their descendants, will be marked as // having a subreaper. In effect, a subreaper fulfills the role // of init(1) for its descendant processes. Upon termination of // a process that is orphaned (i.e., its immediate parent has // already terminated) and marked as having a subreaper, the // nearest still living ancestor subreaper will receive a SIGCHLD // signal and be able to wait(2) on the process to discover its // termination status. const PR_SET_CHILD_SUBREAPER = 36 type ParentDeathSignal int func (p ParentDeathSignal) Restore() error { if p == 0 { return nil } current, err := GetParentDeathSignal() if err != nil { return err } if p == current { return nil } return p.Set() } func (p ParentDeathSignal) Set() error { return SetParentDeathSignal(uintptr(p)) } func Execv(cmd string, args []string, env []string) error { name, err := exec.LookPath(cmd) if err != nil { return err } return syscall.Exec(name, args, env) } func Prlimit(pid, resource int, limit syscall.Rlimit) error { _, _, err := syscall.RawSyscall6(syscall.SYS_PRLIMIT64, uintptr(pid), uintptr(resource), uintptr(unsafe.Pointer(&limit)), uintptr(unsafe.Pointer(&limit)), 0, 0) if err != 0 { return err } return nil } func SetParentDeathSignal(sig uintptr) error { if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, sig, 0); err != 0 { return err } return nil } func GetParentDeathSignal() (ParentDeathSignal, error) { var sig int _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0) if err != 0 { return -1, err } return ParentDeathSignal(sig), nil } func SetKeepCaps() error { if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_KEEPCAPS, 1, 0); err != 0 { return err } return nil } func ClearKeepCaps() error { if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_KEEPCAPS, 0, 0); err != 0 { return err } return nil } func Setctty() error { if _, _, err := syscall.RawSyscall(syscall.SYS_IOCTL, 0, uintptr(syscall.TIOCSCTTY), 0); err != 0 { return err } return nil } // RunningInUserNS detects whether we are currently running in a user namespace. // Copied from github.com/lxc/lxd/shared/util.go func RunningInUserNS() bool { file, err := os.Open("/proc/self/uid_map") if err != nil { // This kernel-provided file only exists if user namespaces are supported return false } defer file.Close() buf := bufio.NewReader(file) l, _, err := buf.ReadLine() if err != nil { return false } line := string(l) var a, b, c int64 fmt.Sscanf(line, "%d %d %d", &a, &b, &c) /* * We assume we are in the initial user namespace if we have a full * range - 4294967295 uids starting at uid 0. */ if a == 0 && b == 0 && c == 4294967295 { return false } return true } // SetSubreaper sets the value i as the subreaper setting for the calling process func SetSubreaper(i int) error { return Prctl(PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0) } func Prctl(option int, arg2, arg3, arg4, arg5 uintptr) (err error) { _, _, e1 := syscall.Syscall6(syscall.SYS_PRCTL, uintptr(option), arg2, arg3, arg4, arg5, 0) if e1 != 0 { err = e1 } return } docker-runc-tags-docker-1.13.1/libcontainer/system/proc.go000066400000000000000000000026441304443252500235110ustar00rootroot00000000000000package system import ( "io/ioutil" "path/filepath" "strconv" "strings" ) // look in /proc to find the process start time so that we can verify // that this pid has started after ourself func GetProcessStartTime(pid int) (string, error) { data, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat")) if err != nil { return "", err } return parseStartTime(string(data)) } func parseStartTime(stat string) (string, error) { // the starttime is located at pos 22 // from the man page // // starttime %llu (was %lu before Linux 2.6) // (22) The time the process started after system boot. In kernels before Linux 2.6, this // value was expressed in jiffies. Since Linux 2.6, the value is expressed in clock ticks // (divide by sysconf(_SC_CLK_TCK)). // // NOTE: // pos 2 could contain space and is inside `(` and `)`: // (2) comm %s // The filename of the executable, in parentheses. // This is visible whether or not the executable is // swapped out. // // the following is an example: // 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0 // get parts after last `)`: s := strings.Split(stat, ")") parts := strings.Split(strings.TrimSpace(s[len(s)-1]), " ") return parts[22-3], nil // starts at 3 (after the filename pos `2`) } docker-runc-tags-docker-1.13.1/libcontainer/system/proc_test.go000066400000000000000000000025301304443252500245420ustar00rootroot00000000000000package system import "testing" func TestParseStartTime(t *testing.T) { data := map[string]string{ "4902 (gunicorn: maste) S 4885 4902 4902 0 -1 4194560 29683 29929 61 83 78 16 96 17 20 0 1 0 9126532 52965376 1903 18446744073709551615 4194304 7461796 140733928751520 140733928698072 139816984959091 0 0 16781312 137447943 1 0 0 17 3 0 0 9 0 0 9559488 10071156 33050624 140733928758775 140733928758945 140733928758945 140733928759264 0": "9126532", "9534 (cat) R 9323 9534 9323 34828 9534 4194304 95 0 0 0 0 0 0 0 20 0 1 0 9214966 7626752 168 18446744073709551615 4194304 4240332 140732237651568 140732237650920 140570710391216 0 0 0 0 0 0 0 17 1 0 0 0 0 0 6340112 6341364 21553152 140732237653865 140732237653885 140732237653885 140732237656047 0": "9214966", "24767 (irq/44-mei_me) S 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 -51 0 1 0 8722075 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 1 50 1 0 0 0 0 0 0 0 0 0 0 0": "8722075", } for line, startTime := range data { st, err := parseStartTime(line) if err != nil { t.Fatal(err) } if startTime != st { t.Fatalf("expected start time %q but received %q", startTime, st) } } } docker-runc-tags-docker-1.13.1/libcontainer/system/setns_linux.go000066400000000000000000000017611304443252500251200ustar00rootroot00000000000000package system import ( "fmt" "runtime" "syscall" ) // Via http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7b21fddd087678a70ad64afc0f632e0f1071b092 // // We need different setns values for the different platforms and arch // We are declaring the macro here because the SETNS syscall does not exist in th stdlib var setNsMap = map[string]uintptr{ "linux/386": 346, "linux/arm64": 268, "linux/amd64": 308, "linux/arm": 375, "linux/ppc": 350, "linux/ppc64": 350, "linux/ppc64le": 350, "linux/s390x": 339, } var sysSetns = setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)] func SysSetns() uint32 { return uint32(sysSetns) } func Setns(fd uintptr, flags uintptr) error { ns, exists := setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)] if !exists { return fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH) } _, _, err := syscall.RawSyscall(ns, fd, flags, 0) if err != 0 { return err } return nil } docker-runc-tags-docker-1.13.1/libcontainer/system/syscall_linux_386.go000066400000000000000000000007511304443252500260340ustar00rootroot00000000000000// +build linux,386 package system import ( "syscall" ) // Setuid sets the uid of the calling thread to the specified uid. func Setuid(uid int) (err error) { _, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID32, uintptr(uid), 0, 0) if e1 != 0 { err = e1 } return } // Setgid sets the gid of the calling thread to the specified gid. func Setgid(gid int) (err error) { _, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID32, uintptr(gid), 0, 0) if e1 != 0 { err = e1 } return } docker-runc-tags-docker-1.13.1/libcontainer/system/syscall_linux_64.go000066400000000000000000000010431304443252500257400ustar00rootroot00000000000000// +build linux,arm64 linux,amd64 linux,ppc linux,ppc64 linux,ppc64le linux,s390x package system import ( "syscall" ) // Setuid sets the uid of the calling thread to the specified uid. func Setuid(uid int) (err error) { _, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID, uintptr(uid), 0, 0) if e1 != 0 { err = e1 } return } // Setgid sets the gid of the calling thread to the specified gid. func Setgid(gid int) (err error) { _, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID, uintptr(gid), 0, 0) if e1 != 0 { err = e1 } return } docker-runc-tags-docker-1.13.1/libcontainer/system/syscall_linux_arm.go000066400000000000000000000007511304443252500262730ustar00rootroot00000000000000// +build linux,arm package system import ( "syscall" ) // Setuid sets the uid of the calling thread to the specified uid. func Setuid(uid int) (err error) { _, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID32, uintptr(uid), 0, 0) if e1 != 0 { err = e1 } return } // Setgid sets the gid of the calling thread to the specified gid. func Setgid(gid int) (err error) { _, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID32, uintptr(gid), 0, 0) if e1 != 0 { err = e1 } return } docker-runc-tags-docker-1.13.1/libcontainer/system/sysconfig.go000066400000000000000000000002321304443252500245410ustar00rootroot00000000000000// +build cgo,linux cgo,freebsd package system /* #include */ import "C" func GetClockTicks() int { return int(C.sysconf(C._SC_CLK_TCK)) } docker-runc-tags-docker-1.13.1/libcontainer/system/sysconfig_notcgo.go000066400000000000000000000007251304443252500261210ustar00rootroot00000000000000// +build !cgo windows package system func GetClockTicks() int { // TODO figure out a better alternative for platforms where we're missing cgo // // TODO Windows. This could be implemented using Win32 QueryPerformanceFrequency(). // https://msdn.microsoft.com/en-us/library/windows/desktop/ms644905(v=vs.85).aspx // // An example of its usage can be found here. // https://msdn.microsoft.com/en-us/library/windows/desktop/dn553408(v=vs.85).aspx return 100 } docker-runc-tags-docker-1.13.1/libcontainer/system/unsupported.go000066400000000000000000000002331304443252500251260ustar00rootroot00000000000000// +build !linux package system // RunningInUserNS is a stub for non-Linux systems // Always returns false func RunningInUserNS() bool { return false } docker-runc-tags-docker-1.13.1/libcontainer/system/xattrs_linux.go000066400000000000000000000054551304443252500253150ustar00rootroot00000000000000package system import ( "syscall" "unsafe" ) var _zero uintptr // Returns the size of xattrs and nil error // Requires path, takes allocated []byte or nil as last argument func Llistxattr(path string, dest []byte) (size int, err error) { pathBytes, err := syscall.BytePtrFromString(path) if err != nil { return -1, err } var newpathBytes unsafe.Pointer if len(dest) > 0 { newpathBytes = unsafe.Pointer(&dest[0]) } else { newpathBytes = unsafe.Pointer(&_zero) } _size, _, errno := syscall.Syscall6(syscall.SYS_LLISTXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(newpathBytes), uintptr(len(dest)), 0, 0, 0) size = int(_size) if errno != 0 { return -1, errno } return size, nil } // Returns a []byte slice if the xattr is set and nil otherwise // Requires path and its attribute as arguments func Lgetxattr(path string, attr string) ([]byte, error) { var sz int pathBytes, err := syscall.BytePtrFromString(path) if err != nil { return nil, err } attrBytes, err := syscall.BytePtrFromString(attr) if err != nil { return nil, err } // Start with a 128 length byte array sz = 128 dest := make([]byte, sz) destBytes := unsafe.Pointer(&dest[0]) _sz, _, errno := syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0) switch { case errno == syscall.ENODATA: return nil, errno case errno == syscall.ENOTSUP: return nil, errno case errno == syscall.ERANGE: // 128 byte array might just not be good enough, // A dummy buffer is used ``uintptr(0)`` to get real size // of the xattrs on disk _sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(unsafe.Pointer(nil)), uintptr(0), 0, 0) sz = int(_sz) if sz < 0 { return nil, errno } dest = make([]byte, sz) destBytes := unsafe.Pointer(&dest[0]) _sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0) if errno != 0 { return nil, errno } case errno != 0: return nil, errno } sz = int(_sz) return dest[:sz], nil } func Lsetxattr(path string, attr string, data []byte, flags int) error { pathBytes, err := syscall.BytePtrFromString(path) if err != nil { return err } attrBytes, err := syscall.BytePtrFromString(attr) if err != nil { return err } var dataBytes unsafe.Pointer if len(data) > 0 { dataBytes = unsafe.Pointer(&data[0]) } else { dataBytes = unsafe.Pointer(&_zero) } _, _, errno := syscall.Syscall6(syscall.SYS_LSETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(dataBytes), uintptr(len(data)), uintptr(flags), 0) if errno != 0 { return errno } return nil } docker-runc-tags-docker-1.13.1/libcontainer/user/000077500000000000000000000000001304443252500216435ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/libcontainer/user/MAINTAINERS000066400000000000000000000001301304443252500233320ustar00rootroot00000000000000Tianon Gravi (@tianon) Aleksa Sarai (@cyphar) docker-runc-tags-docker-1.13.1/libcontainer/user/lookup.go000066400000000000000000000061161304443252500235070ustar00rootroot00000000000000package user import ( "errors" "syscall" ) var ( // The current operating system does not provide the required data for user lookups. ErrUnsupported = errors.New("user lookup: operating system does not provide passwd-formatted data") // No matching entries found in file. ErrNoPasswdEntries = errors.New("no matching entries in passwd file") ErrNoGroupEntries = errors.New("no matching entries in group file") ) func lookupUser(filter func(u User) bool) (User, error) { // Get operating system-specific passwd reader-closer. passwd, err := GetPasswd() if err != nil { return User{}, err } defer passwd.Close() // Get the users. users, err := ParsePasswdFilter(passwd, filter) if err != nil { return User{}, err } // No user entries found. if len(users) == 0 { return User{}, ErrNoPasswdEntries } // Assume the first entry is the "correct" one. return users[0], nil } // CurrentUser looks up the current user by their user id in /etc/passwd. If the // user cannot be found (or there is no /etc/passwd file on the filesystem), // then CurrentUser returns an error. func CurrentUser() (User, error) { return LookupUid(syscall.Getuid()) } // LookupUser looks up a user by their username in /etc/passwd. If the user // cannot be found (or there is no /etc/passwd file on the filesystem), then // LookupUser returns an error. func LookupUser(username string) (User, error) { return lookupUser(func(u User) bool { return u.Name == username }) } // LookupUid looks up a user by their user id in /etc/passwd. If the user cannot // be found (or there is no /etc/passwd file on the filesystem), then LookupId // returns an error. func LookupUid(uid int) (User, error) { return lookupUser(func(u User) bool { return u.Uid == uid }) } func lookupGroup(filter func(g Group) bool) (Group, error) { // Get operating system-specific group reader-closer. group, err := GetGroup() if err != nil { return Group{}, err } defer group.Close() // Get the users. groups, err := ParseGroupFilter(group, filter) if err != nil { return Group{}, err } // No user entries found. if len(groups) == 0 { return Group{}, ErrNoGroupEntries } // Assume the first entry is the "correct" one. return groups[0], nil } // CurrentGroup looks up the current user's group by their primary group id's // entry in /etc/passwd. If the group cannot be found (or there is no // /etc/group file on the filesystem), then CurrentGroup returns an error. func CurrentGroup() (Group, error) { return LookupGid(syscall.Getgid()) } // LookupGroup looks up a group by its name in /etc/group. If the group cannot // be found (or there is no /etc/group file on the filesystem), then LookupGroup // returns an error. func LookupGroup(groupname string) (Group, error) { return lookupGroup(func(g Group) bool { return g.Name == groupname }) } // LookupGid looks up a group by its group id in /etc/group. If the group cannot // be found (or there is no /etc/group file on the filesystem), then LookupGid // returns an error. func LookupGid(gid int) (Group, error) { return lookupGroup(func(g Group) bool { return g.Gid == gid }) } docker-runc-tags-docker-1.13.1/libcontainer/user/lookup_unix.go000066400000000000000000000010211304443252500245400ustar00rootroot00000000000000// +build darwin dragonfly freebsd linux netbsd openbsd solaris package user import ( "io" "os" ) // Unix-specific path to the passwd and group formatted files. const ( unixPasswdPath = "/etc/passwd" unixGroupPath = "/etc/group" ) func GetPasswdPath() (string, error) { return unixPasswdPath, nil } func GetPasswd() (io.ReadCloser, error) { return os.Open(unixPasswdPath) } func GetGroupPath() (string, error) { return unixGroupPath, nil } func GetGroup() (io.ReadCloser, error) { return os.Open(unixGroupPath) } docker-runc-tags-docker-1.13.1/libcontainer/user/lookup_unsupported.go000066400000000000000000000005741304443252500261610ustar00rootroot00000000000000// +build !darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris package user import "io" func GetPasswdPath() (string, error) { return "", ErrUnsupported } func GetPasswd() (io.ReadCloser, error) { return nil, ErrUnsupported } func GetGroupPath() (string, error) { return "", ErrUnsupported } func GetGroup() (io.ReadCloser, error) { return nil, ErrUnsupported } docker-runc-tags-docker-1.13.1/libcontainer/user/user.go000066400000000000000000000253531304443252500231600ustar00rootroot00000000000000package user import ( "bufio" "fmt" "io" "os" "strconv" "strings" ) const ( minId = 0 maxId = 1<<31 - 1 //for 32-bit systems compatibility ) var ( ErrRange = fmt.Errorf("uids and gids must be in range %d-%d", minId, maxId) ) type User struct { Name string Pass string Uid int Gid int Gecos string Home string Shell string } type Group struct { Name string Pass string Gid int List []string } func parseLine(line string, v ...interface{}) { if line == "" { return } parts := strings.Split(line, ":") for i, p := range parts { // Ignore cases where we don't have enough fields to populate the arguments. // Some configuration files like to misbehave. if len(v) <= i { break } // Use the type of the argument to figure out how to parse it, scanf() style. // This is legit. switch e := v[i].(type) { case *string: *e = p case *int: // "numbers", with conversion errors ignored because of some misbehaving configuration files. *e, _ = strconv.Atoi(p) case *[]string: // Comma-separated lists. if p != "" { *e = strings.Split(p, ",") } else { *e = []string{} } default: // Someone goof'd when writing code using this function. Scream so they can hear us. panic(fmt.Sprintf("parseLine only accepts {*string, *int, *[]string} as arguments! %#v is not a pointer!", e)) } } } func ParsePasswdFile(path string) ([]User, error) { passwd, err := os.Open(path) if err != nil { return nil, err } defer passwd.Close() return ParsePasswd(passwd) } func ParsePasswd(passwd io.Reader) ([]User, error) { return ParsePasswdFilter(passwd, nil) } func ParsePasswdFileFilter(path string, filter func(User) bool) ([]User, error) { passwd, err := os.Open(path) if err != nil { return nil, err } defer passwd.Close() return ParsePasswdFilter(passwd, filter) } func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) { if r == nil { return nil, fmt.Errorf("nil source for passwd-formatted data") } var ( s = bufio.NewScanner(r) out = []User{} ) for s.Scan() { if err := s.Err(); err != nil { return nil, err } line := strings.TrimSpace(s.Text()) if line == "" { continue } // see: man 5 passwd // name:password:UID:GID:GECOS:directory:shell // Name:Pass:Uid:Gid:Gecos:Home:Shell // root:x:0:0:root:/root:/bin/bash // adm:x:3:4:adm:/var/adm:/bin/false p := User{} parseLine(line, &p.Name, &p.Pass, &p.Uid, &p.Gid, &p.Gecos, &p.Home, &p.Shell) if filter == nil || filter(p) { out = append(out, p) } } return out, nil } func ParseGroupFile(path string) ([]Group, error) { group, err := os.Open(path) if err != nil { return nil, err } defer group.Close() return ParseGroup(group) } func ParseGroup(group io.Reader) ([]Group, error) { return ParseGroupFilter(group, nil) } func ParseGroupFileFilter(path string, filter func(Group) bool) ([]Group, error) { group, err := os.Open(path) if err != nil { return nil, err } defer group.Close() return ParseGroupFilter(group, filter) } func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) { if r == nil { return nil, fmt.Errorf("nil source for group-formatted data") } var ( s = bufio.NewScanner(r) out = []Group{} ) for s.Scan() { if err := s.Err(); err != nil { return nil, err } text := s.Text() if text == "" { continue } // see: man 5 group // group_name:password:GID:user_list // Name:Pass:Gid:List // root:x:0:root // adm:x:4:root,adm,daemon p := Group{} parseLine(text, &p.Name, &p.Pass, &p.Gid, &p.List) if filter == nil || filter(p) { out = append(out, p) } } return out, nil } type ExecUser struct { Uid int Gid int Sgids []int Home string } // GetExecUserPath is a wrapper for GetExecUser. It reads data from each of the // given file paths and uses that data as the arguments to GetExecUser. If the // files cannot be opened for any reason, the error is ignored and a nil // io.Reader is passed instead. func GetExecUserPath(userSpec string, defaults *ExecUser, passwdPath, groupPath string) (*ExecUser, error) { passwd, err := os.Open(passwdPath) if err != nil { passwd = nil } else { defer passwd.Close() } group, err := os.Open(groupPath) if err != nil { group = nil } else { defer group.Close() } return GetExecUser(userSpec, defaults, passwd, group) } // GetExecUser parses a user specification string (using the passwd and group // readers as sources for /etc/passwd and /etc/group data, respectively). In // the case of blank fields or missing data from the sources, the values in // defaults is used. // // GetExecUser will return an error if a user or group literal could not be // found in any entry in passwd and group respectively. // // Examples of valid user specifications are: // * "" // * "user" // * "uid" // * "user:group" // * "uid:gid // * "user:gid" // * "uid:group" // // It should be noted that if you specify a numeric user or group id, they will // not be evaluated as usernames (only the metadata will be filled). So attempting // to parse a user with user.Name = "1337" will produce the user with a UID of // 1337. func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (*ExecUser, error) { if defaults == nil { defaults = new(ExecUser) } // Copy over defaults. user := &ExecUser{ Uid: defaults.Uid, Gid: defaults.Gid, Sgids: defaults.Sgids, Home: defaults.Home, } // Sgids slice *cannot* be nil. if user.Sgids == nil { user.Sgids = []int{} } // Allow for userArg to have either "user" syntax, or optionally "user:group" syntax var userArg, groupArg string parseLine(userSpec, &userArg, &groupArg) // Convert userArg and groupArg to be numeric, so we don't have to execute // Atoi *twice* for each iteration over lines. uidArg, uidErr := strconv.Atoi(userArg) gidArg, gidErr := strconv.Atoi(groupArg) // Find the matching user. users, err := ParsePasswdFilter(passwd, func(u User) bool { if userArg == "" { // Default to current state of the user. return u.Uid == user.Uid } if uidErr == nil { // If the userArg is numeric, always treat it as a UID. return uidArg == u.Uid } return u.Name == userArg }) // If we can't find the user, we have to bail. if err != nil && passwd != nil { if userArg == "" { userArg = strconv.Itoa(user.Uid) } return nil, fmt.Errorf("unable to find user %s: %v", userArg, err) } var matchedUserName string if len(users) > 0 { // First match wins, even if there's more than one matching entry. matchedUserName = users[0].Name user.Uid = users[0].Uid user.Gid = users[0].Gid user.Home = users[0].Home } else if userArg != "" { // If we can't find a user with the given username, the only other valid // option is if it's a numeric username with no associated entry in passwd. if uidErr != nil { // Not numeric. return nil, fmt.Errorf("unable to find user %s: %v", userArg, ErrNoPasswdEntries) } user.Uid = uidArg // Must be inside valid uid range. if user.Uid < minId || user.Uid > maxId { return nil, ErrRange } // Okay, so it's numeric. We can just roll with this. } // On to the groups. If we matched a username, we need to do this because of // the supplementary group IDs. if groupArg != "" || matchedUserName != "" { groups, err := ParseGroupFilter(group, func(g Group) bool { // If the group argument isn't explicit, we'll just search for it. if groupArg == "" { // Check if user is a member of this group. for _, u := range g.List { if u == matchedUserName { return true } } return false } if gidErr == nil { // If the groupArg is numeric, always treat it as a GID. return gidArg == g.Gid } return g.Name == groupArg }) if err != nil && group != nil { return nil, fmt.Errorf("unable to find groups for spec %v: %v", matchedUserName, err) } // Only start modifying user.Gid if it is in explicit form. if groupArg != "" { if len(groups) > 0 { // First match wins, even if there's more than one matching entry. user.Gid = groups[0].Gid } else if groupArg != "" { // If we can't find a group with the given name, the only other valid // option is if it's a numeric group name with no associated entry in group. if gidErr != nil { // Not numeric. return nil, fmt.Errorf("unable to find group %s: %v", groupArg, ErrNoGroupEntries) } user.Gid = gidArg // Must be inside valid gid range. if user.Gid < minId || user.Gid > maxId { return nil, ErrRange } // Okay, so it's numeric. We can just roll with this. } } else if len(groups) > 0 { // Supplementary group ids only make sense if in the implicit form. user.Sgids = make([]int, len(groups)) for i, group := range groups { user.Sgids[i] = group.Gid } } } return user, nil } // GetAdditionalGroups looks up a list of groups by name or group id // against the given /etc/group formatted data. If a group name cannot // be found, an error will be returned. If a group id cannot be found, // or the given group data is nil, the id will be returned as-is // provided it is in the legal range. func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, error) { var groups = []Group{} if group != nil { var err error groups, err = ParseGroupFilter(group, func(g Group) bool { for _, ag := range additionalGroups { if g.Name == ag || strconv.Itoa(g.Gid) == ag { return true } } return false }) if err != nil { return nil, fmt.Errorf("Unable to find additional groups %v: %v", additionalGroups, err) } } gidMap := make(map[int]struct{}) for _, ag := range additionalGroups { var found bool for _, g := range groups { // if we found a matched group either by name or gid, take the // first matched as correct if g.Name == ag || strconv.Itoa(g.Gid) == ag { if _, ok := gidMap[g.Gid]; !ok { gidMap[g.Gid] = struct{}{} found = true break } } } // we asked for a group but didn't find it. let's check to see // if we wanted a numeric group if !found { gid, err := strconv.Atoi(ag) if err != nil { return nil, fmt.Errorf("Unable to find group %s", ag) } // Ensure gid is inside gid range. if gid < minId || gid > maxId { return nil, ErrRange } gidMap[gid] = struct{}{} } } gids := []int{} for gid := range gidMap { gids = append(gids, gid) } return gids, nil } // GetAdditionalGroupsPath is a wrapper around GetAdditionalGroups // that opens the groupPath given and gives it as an argument to // GetAdditionalGroups. func GetAdditionalGroupsPath(additionalGroups []string, groupPath string) ([]int, error) { group, err := os.Open(groupPath) if err == nil { defer group.Close() } return GetAdditionalGroups(additionalGroups, group) } docker-runc-tags-docker-1.13.1/libcontainer/user/user_test.go000066400000000000000000000243321304443252500242130ustar00rootroot00000000000000package user import ( "io" "reflect" "sort" "strconv" "strings" "testing" "github.com/opencontainers/runc/libcontainer/utils" ) func TestUserParseLine(t *testing.T) { var ( a, b string c []string d int ) parseLine("", &a, &b) if a != "" || b != "" { t.Fatalf("a and b should be empty ('%v', '%v')", a, b) } parseLine("a", &a, &b) if a != "a" || b != "" { t.Fatalf("a should be 'a' and b should be empty ('%v', '%v')", a, b) } parseLine("bad boys:corny cows", &a, &b) if a != "bad boys" || b != "corny cows" { t.Fatalf("a should be 'bad boys' and b should be 'corny cows' ('%v', '%v')", a, b) } parseLine("", &c) if len(c) != 0 { t.Fatalf("c should be empty (%#v)", c) } parseLine("d,e,f:g:h:i,j,k", &c, &a, &b, &c) if a != "g" || b != "h" || len(c) != 3 || c[0] != "i" || c[1] != "j" || c[2] != "k" { t.Fatalf("a should be 'g', b should be 'h', and c should be ['i','j','k'] ('%v', '%v', '%#v')", a, b, c) } parseLine("::::::::::", &a, &b, &c) if a != "" || b != "" || len(c) != 0 { t.Fatalf("a, b, and c should all be empty ('%v', '%v', '%#v')", a, b, c) } parseLine("not a number", &d) if d != 0 { t.Fatalf("d should be 0 (%v)", d) } parseLine("b:12:c", &a, &d, &b) if a != "b" || b != "c" || d != 12 { t.Fatalf("a should be 'b' and b should be 'c', and d should be 12 ('%v', '%v', %v)", a, b, d) } } func TestUserParsePasswd(t *testing.T) { users, err := ParsePasswdFilter(strings.NewReader(` root:x:0:0:root:/root:/bin/bash adm:x:3:4:adm:/var/adm:/bin/false this is just some garbage data `), nil) if err != nil { t.Fatalf("Unexpected error: %v", err) } if len(users) != 3 { t.Fatalf("Expected 3 users, got %v", len(users)) } if users[0].Uid != 0 || users[0].Name != "root" { t.Fatalf("Expected users[0] to be 0 - root, got %v - %v", users[0].Uid, users[0].Name) } if users[1].Uid != 3 || users[1].Name != "adm" { t.Fatalf("Expected users[1] to be 3 - adm, got %v - %v", users[1].Uid, users[1].Name) } } func TestUserParseGroup(t *testing.T) { groups, err := ParseGroupFilter(strings.NewReader(` root:x:0:root adm:x:4:root,adm,daemon this is just some garbage data `), nil) if err != nil { t.Fatalf("Unexpected error: %v", err) } if len(groups) != 3 { t.Fatalf("Expected 3 groups, got %v", len(groups)) } if groups[0].Gid != 0 || groups[0].Name != "root" || len(groups[0].List) != 1 { t.Fatalf("Expected groups[0] to be 0 - root - 1 member, got %v - %v - %v", groups[0].Gid, groups[0].Name, len(groups[0].List)) } if groups[1].Gid != 4 || groups[1].Name != "adm" || len(groups[1].List) != 3 { t.Fatalf("Expected groups[1] to be 4 - adm - 3 members, got %v - %v - %v", groups[1].Gid, groups[1].Name, len(groups[1].List)) } } func TestValidGetExecUser(t *testing.T) { const passwdContent = ` root:x:0:0:root user:/root:/bin/bash adm:x:42:43:adm:/var/adm:/bin/false 111:x:222:333::/var/garbage odd:x:111:112::/home/odd::::: this is just some garbage data ` const groupContent = ` root:x:0:root adm:x:43: grp:x:1234:root,adm 444:x:555:111 odd:x:444: this is just some garbage data ` defaultExecUser := ExecUser{ Uid: 8888, Gid: 8888, Sgids: []int{8888}, Home: "/8888", } tests := []struct { ref string expected ExecUser }{ { ref: "root", expected: ExecUser{ Uid: 0, Gid: 0, Sgids: []int{0, 1234}, Home: "/root", }, }, { ref: "adm", expected: ExecUser{ Uid: 42, Gid: 43, Sgids: []int{1234}, Home: "/var/adm", }, }, { ref: "root:adm", expected: ExecUser{ Uid: 0, Gid: 43, Sgids: defaultExecUser.Sgids, Home: "/root", }, }, { ref: "adm:1234", expected: ExecUser{ Uid: 42, Gid: 1234, Sgids: defaultExecUser.Sgids, Home: "/var/adm", }, }, { ref: "42:1234", expected: ExecUser{ Uid: 42, Gid: 1234, Sgids: defaultExecUser.Sgids, Home: "/var/adm", }, }, { ref: "1337:1234", expected: ExecUser{ Uid: 1337, Gid: 1234, Sgids: defaultExecUser.Sgids, Home: defaultExecUser.Home, }, }, { ref: "1337", expected: ExecUser{ Uid: 1337, Gid: defaultExecUser.Gid, Sgids: defaultExecUser.Sgids, Home: defaultExecUser.Home, }, }, { ref: "", expected: ExecUser{ Uid: defaultExecUser.Uid, Gid: defaultExecUser.Gid, Sgids: defaultExecUser.Sgids, Home: defaultExecUser.Home, }, }, // Regression tests for #695. { ref: "111", expected: ExecUser{ Uid: 111, Gid: 112, Sgids: defaultExecUser.Sgids, Home: "/home/odd", }, }, { ref: "111:444", expected: ExecUser{ Uid: 111, Gid: 444, Sgids: defaultExecUser.Sgids, Home: "/home/odd", }, }, } for _, test := range tests { passwd := strings.NewReader(passwdContent) group := strings.NewReader(groupContent) execUser, err := GetExecUser(test.ref, &defaultExecUser, passwd, group) if err != nil { t.Logf("got unexpected error when parsing '%s': %s", test.ref, err.Error()) t.Fail() continue } if !reflect.DeepEqual(test.expected, *execUser) { t.Logf("ref: %v", test.ref) t.Logf("got: %#v", execUser) t.Logf("expected: %#v", test.expected) t.Fail() continue } } } func TestInvalidGetExecUser(t *testing.T) { const passwdContent = ` root:x:0:0:root user:/root:/bin/bash adm:x:42:43:adm:/var/adm:/bin/false -42:x:12:13:broken:/very/broken this is just some garbage data ` const groupContent = ` root:x:0:root adm:x:43: grp:x:1234:root,adm this is just some garbage data ` tests := []string{ // No such user/group. "notuser", "notuser:notgroup", "root:notgroup", "notuser:adm", "8888:notgroup", "notuser:8888", // Invalid user/group values. "-1:0", "0:-3", "-5:-2", "-42", "-43", } for _, test := range tests { passwd := strings.NewReader(passwdContent) group := strings.NewReader(groupContent) execUser, err := GetExecUser(test, nil, passwd, group) if err == nil { t.Logf("got unexpected success when parsing '%s': %#v", test, execUser) t.Fail() continue } } } func TestGetExecUserNilSources(t *testing.T) { const passwdContent = ` root:x:0:0:root user:/root:/bin/bash adm:x:42:43:adm:/var/adm:/bin/false this is just some garbage data ` const groupContent = ` root:x:0:root adm:x:43: grp:x:1234:root,adm this is just some garbage data ` defaultExecUser := ExecUser{ Uid: 8888, Gid: 8888, Sgids: []int{8888}, Home: "/8888", } tests := []struct { ref string passwd, group bool expected ExecUser }{ { ref: "", passwd: false, group: false, expected: ExecUser{ Uid: 8888, Gid: 8888, Sgids: []int{8888}, Home: "/8888", }, }, { ref: "root", passwd: true, group: false, expected: ExecUser{ Uid: 0, Gid: 0, Sgids: []int{8888}, Home: "/root", }, }, { ref: "0", passwd: false, group: false, expected: ExecUser{ Uid: 0, Gid: 8888, Sgids: []int{8888}, Home: "/8888", }, }, { ref: "0:0", passwd: false, group: false, expected: ExecUser{ Uid: 0, Gid: 0, Sgids: []int{8888}, Home: "/8888", }, }, } for _, test := range tests { var passwd, group io.Reader if test.passwd { passwd = strings.NewReader(passwdContent) } if test.group { group = strings.NewReader(groupContent) } execUser, err := GetExecUser(test.ref, &defaultExecUser, passwd, group) if err != nil { t.Logf("got unexpected error when parsing '%s': %s", test.ref, err.Error()) t.Fail() continue } if !reflect.DeepEqual(test.expected, *execUser) { t.Logf("got: %#v", execUser) t.Logf("expected: %#v", test.expected) t.Fail() continue } } } func TestGetAdditionalGroups(t *testing.T) { type foo struct { groups []string expected []int hasError bool } const groupContent = ` root:x:0:root adm:x:43: grp:x:1234:root,adm adm:x:4343:root,adm-duplicate this is just some garbage data ` tests := []foo{ { // empty group groups: []string{}, expected: []int{}, }, { // single group groups: []string{"adm"}, expected: []int{43}, }, { // multiple groups groups: []string{"adm", "grp"}, expected: []int{43, 1234}, }, { // invalid group groups: []string{"adm", "grp", "not-exist"}, expected: nil, hasError: true, }, { // group with numeric id groups: []string{"43"}, expected: []int{43}, }, { // group with unknown numeric id groups: []string{"adm", "10001"}, expected: []int{43, 10001}, }, { // groups specified twice with numeric and name groups: []string{"adm", "43"}, expected: []int{43}, }, { // groups with too small id groups: []string{"-1"}, expected: nil, hasError: true, }, } if utils.GetIntSize() > 4 { tests = append(tests, foo{ // groups with too large id groups: []string{strconv.Itoa(1 << 31)}, expected: nil, hasError: true, }) } for _, test := range tests { group := strings.NewReader(groupContent) gids, err := GetAdditionalGroups(test.groups, group) if test.hasError && err == nil { t.Errorf("Parse(%#v) expects error but has none", test) continue } if !test.hasError && err != nil { t.Errorf("Parse(%#v) has error %v", test, err) continue } sort.Sort(sort.IntSlice(gids)) if !reflect.DeepEqual(gids, test.expected) { t.Errorf("Gids(%v), expect %v from groups %v", gids, test.expected, test.groups) } } } func TestGetAdditionalGroupsNumeric(t *testing.T) { tests := []struct { groups []string expected []int hasError bool }{ { // numeric groups only groups: []string{"1234", "5678"}, expected: []int{1234, 5678}, }, { // numeric and alphabetic groups: []string{"1234", "fake"}, expected: nil, hasError: true, }, } for _, test := range tests { gids, err := GetAdditionalGroups(test.groups, nil) if test.hasError && err == nil { t.Errorf("Parse(%#v) expects error but has none", test) continue } if !test.hasError && err != nil { t.Errorf("Parse(%#v) has error %v", test, err) continue } sort.Sort(sort.IntSlice(gids)) if !reflect.DeepEqual(gids, test.expected) { t.Errorf("Gids(%v), expect %v from groups %v", gids, test.expected, test.groups) } } } docker-runc-tags-docker-1.13.1/libcontainer/utils/000077500000000000000000000000001304443252500220255ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/libcontainer/utils/utils.go000066400000000000000000000067111304443252500235210ustar00rootroot00000000000000package utils import ( "crypto/rand" "encoding/hex" "encoding/json" "io" "os" "path/filepath" "strings" "syscall" "unsafe" ) const ( exitSignalOffset = 128 ) // GenerateRandomName returns a new name joined with a prefix. This size // specified is used to truncate the randomly generated value func GenerateRandomName(prefix string, size int) (string, error) { id := make([]byte, 32) if _, err := io.ReadFull(rand.Reader, id); err != nil { return "", err } if size > 64 { size = 64 } return prefix + hex.EncodeToString(id)[:size], nil } // ResolveRootfs ensures that the current working directory is // not a symlink and returns the absolute path to the rootfs func ResolveRootfs(uncleanRootfs string) (string, error) { rootfs, err := filepath.Abs(uncleanRootfs) if err != nil { return "", err } return filepath.EvalSymlinks(rootfs) } // ExitStatus returns the correct exit status for a process based on if it // was signaled or exited cleanly func ExitStatus(status syscall.WaitStatus) int { if status.Signaled() { return exitSignalOffset + int(status.Signal()) } return status.ExitStatus() } // WriteJSON writes the provided struct v to w using standard json marshaling func WriteJSON(w io.Writer, v interface{}) error { data, err := json.Marshal(v) if err != nil { return err } _, err = w.Write(data) return err } // CleanPath makes a path safe for use with filepath.Join. This is done by not // only cleaning the path, but also (if the path is relative) adding a leading // '/' and cleaning it (then removing the leading '/'). This ensures that a // path resulting from prepending another path will always resolve to lexically // be a subdirectory of the prefixed path. This is all done lexically, so paths // that include symlinks won't be safe as a result of using CleanPath. func CleanPath(path string) string { // Deal with empty strings nicely. if path == "" { return "" } // Ensure that all paths are cleaned (especially problematic ones like // "/../../../../../" which can cause lots of issues). path = filepath.Clean(path) // If the path isn't absolute, we need to do more processing to fix paths // such as "../../../..//some/path". We also shouldn't convert absolute // paths to relative ones. if !filepath.IsAbs(path) { path = filepath.Clean(string(os.PathSeparator) + path) // This can't fail, as (by definition) all paths are relative to root. path, _ = filepath.Rel(string(os.PathSeparator), path) } // Clean the path again for good measure. return filepath.Clean(path) } // SearchLabels searches a list of key-value pairs for the provided key and // returns the corresponding value. The pairs must be separated with '='. func SearchLabels(labels []string, query string) string { for _, l := range labels { parts := strings.SplitN(l, "=", 2) if len(parts) < 2 { continue } if parts[0] == query { return parts[1] } } return "" } // Annotations returns the bundle path and user defined annotations from the // libcontainer state. We need to remove the bundle because that is a label // added by libcontainer. func Annotations(labels []string) (bundle string, userAnnotations map[string]string) { userAnnotations = make(map[string]string) for _, l := range labels { parts := strings.SplitN(l, "=", 2) if len(parts) < 2 { continue } if parts[0] == "bundle" { bundle = parts[1] } else { userAnnotations[parts[0]] = parts[1] } } return } func GetIntSize() int { return int(unsafe.Sizeof(1)) } docker-runc-tags-docker-1.13.1/libcontainer/utils/utils_test.go000066400000000000000000000063731304443252500245640ustar00rootroot00000000000000package utils import ( "bytes" "fmt" "os" "path/filepath" "syscall" "testing" ) func TestGenerateName(t *testing.T) { name, err := GenerateRandomName("veth", 5) if err != nil { t.Fatal(err) } expected := 5 + len("veth") if len(name) != expected { t.Fatalf("expected name to be %d chars but received %d", expected, len(name)) } name, err = GenerateRandomName("veth", 65) if err != nil { t.Fatal(err) } expected = 64 + len("veth") if len(name) != expected { t.Fatalf("expected name to be %d chars but received %d", expected, len(name)) } } var labelTest = []struct { labels []string query string expectedValue string }{ {[]string{"bundle=/path/to/bundle"}, "bundle", "/path/to/bundle"}, {[]string{"test=a", "test=b"}, "bundle", ""}, {[]string{"bundle=a", "test=b", "bundle=c"}, "bundle", "a"}, {[]string{"", "test=a", "bundle=b"}, "bundle", "b"}, {[]string{"test", "bundle=a"}, "bundle", "a"}, {[]string{"test=a", "bundle="}, "bundle", ""}, } func TestSearchLabels(t *testing.T) { for _, tt := range labelTest { if v := SearchLabels(tt.labels, tt.query); v != tt.expectedValue { t.Errorf("expected value '%s' for query '%s'; got '%s'", tt.expectedValue, tt.query, v) } } } func TestResolveRootfs(t *testing.T) { dir := "rootfs" os.Mkdir(dir, 0600) defer os.Remove(dir) path, err := ResolveRootfs(dir) if err != nil { t.Fatal(err) } pwd, err := os.Getwd() if err != nil { t.Fatal(err) } if path != fmt.Sprintf("%s/%s", pwd, "rootfs") { t.Errorf("expected rootfs to be abs and was %s", path) } } func TestResolveRootfsWithSymlink(t *testing.T) { dir := "rootfs" tmpDir, _ := filepath.EvalSymlinks(os.TempDir()) os.Symlink(tmpDir, dir) defer os.Remove(dir) path, err := ResolveRootfs(dir) if err != nil { t.Fatal(err) } if path != tmpDir { t.Errorf("expected rootfs to be the real path %s and was %s", path, os.TempDir()) } } func TestResolveRootfsWithNonExistingDir(t *testing.T) { _, err := ResolveRootfs("foo") if err == nil { t.Error("expected error to happen but received nil") } } func TestExitStatus(t *testing.T) { status := syscall.WaitStatus(0) ex := ExitStatus(status) if ex != 0 { t.Errorf("expected exit status to equal 0 and received %d", ex) } } func TestExitStatusSignaled(t *testing.T) { status := syscall.WaitStatus(2) ex := ExitStatus(status) if ex != 130 { t.Errorf("expected exit status to equal 130 and received %d", ex) } } func TestWriteJSON(t *testing.T) { person := struct { Name string Age int }{ Name: "Alice", Age: 30, } var b bytes.Buffer err := WriteJSON(&b, person) if err != nil { t.Fatal(err) } expected := `{"Name":"Alice","Age":30}` if b.String() != expected { t.Errorf("expected to write %s but was %s", expected, b.String()) } } func TestCleanPath(t *testing.T) { path := CleanPath("") if path != "" { t.Errorf("expected to receive empty string and received %s", path) } path = CleanPath("rootfs") if path != "rootfs" { t.Errorf("expected to receive 'rootfs' and received %s", path) } path = CleanPath("../../../var") if path != "var" { t.Errorf("expected to receive 'var' and received %s", path) } path = CleanPath("/../../../var") if path != "/var" { t.Errorf("expected to receive '/var' and received %s", path) } } docker-runc-tags-docker-1.13.1/libcontainer/utils/utils_unix.go000066400000000000000000000013301304443252500245540ustar00rootroot00000000000000// +build !windows package utils import ( "io/ioutil" "strconv" "syscall" ) func CloseExecFrom(minFd int) error { fdList, err := ioutil.ReadDir("/proc/self/fd") if err != nil { return err } for _, fi := range fdList { fd, err := strconv.Atoi(fi.Name()) if err != nil { // ignore non-numeric file names continue } if fd < minFd { // ignore descriptors lower than our specified minimum continue } // intentionally ignore errors from syscall.CloseOnExec syscall.CloseOnExec(fd) // the cases where this might fail are basically file descriptors that have already been closed (including and especially the one that was created when ioutil.ReadDir did the "opendir" syscall) } return nil } docker-runc-tags-docker-1.13.1/libcontainer/xattr/000077500000000000000000000000001304443252500220275ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/libcontainer/xattr/errors.go000066400000000000000000000002501304443252500236670ustar00rootroot00000000000000package xattr import ( "fmt" "runtime" ) var ErrNotSupportedPlatform = fmt.Errorf("platform and architecture is not supported %s %s", runtime.GOOS, runtime.GOARCH) docker-runc-tags-docker-1.13.1/libcontainer/xattr/xattr_linux.go000066400000000000000000000017601304443252500247430ustar00rootroot00000000000000// +build linux package xattr import ( "syscall" "github.com/opencontainers/runc/libcontainer/system" ) func XattrEnabled(path string) bool { if Setxattr(path, "user.test", "") == syscall.ENOTSUP { return false } return true } func stringsfromByte(buf []byte) (result []string) { offset := 0 for index, b := range buf { if b == 0 { result = append(result, string(buf[offset:index])) offset = index + 1 } } return } func Listxattr(path string) ([]string, error) { size, err := system.Llistxattr(path, nil) if err != nil { return nil, err } buf := make([]byte, size) read, err := system.Llistxattr(path, buf) if err != nil { return nil, err } names := stringsfromByte(buf[:read]) return names, nil } func Getxattr(path, attr string) (string, error) { value, err := system.Lgetxattr(path, attr) if err != nil { return "", err } return string(value), nil } func Setxattr(path, xattr, value string) error { return system.Lsetxattr(path, xattr, []byte(value), 0) } docker-runc-tags-docker-1.13.1/libcontainer/xattr/xattr_test.go000066400000000000000000000027471304443252500245710ustar00rootroot00000000000000// +build linux package xattr_test import ( "os" "testing" "github.com/opencontainers/runc/libcontainer/xattr" ) func TestXattr(t *testing.T) { tmp := "xattr_test" out, err := os.OpenFile(tmp, os.O_WRONLY|os.O_CREATE, 0) if err != nil { t.Fatal("failed") } defer os.Remove(tmp) attr := "user.test" out.Close() if !xattr.XattrEnabled(tmp) { t.Log("Disabled") t.Fatal("failed") } t.Log("Success") err = xattr.Setxattr(tmp, attr, "test") if err != nil { t.Fatal("failed") } var value string value, err = xattr.Getxattr(tmp, attr) if err != nil { t.Fatal("failed") } if value != "test" { t.Fatal("failed") } t.Log("Success") var names []string names, err = xattr.Listxattr(tmp) if err != nil { t.Fatal("failed") } var found int for _, name := range names { if name == attr { found = 1 } } // Listxattr doesn't return trusted.* and system.* namespace // attrs when run in unprevileged mode. if found != 1 { t.Fatal("failed") } t.Log("Success") big := "0000000000000000000000000000000000000000000000000000000000000000000008c6419ad822dfe29283fb3ac98dcc5908810cb31f4cfe690040c42c144b7492eicompslf20dxmlpgz" // Test for long xattrs larger than 128 bytes err = xattr.Setxattr(tmp, attr, big) if err != nil { t.Fatal("failed to add long value") } value, err = xattr.Getxattr(tmp, attr) if err != nil { t.Fatal("failed to get long value") } t.Log("Success") if value != big { t.Fatal("failed, value doesn't match") } t.Log("Success") } docker-runc-tags-docker-1.13.1/libcontainer/xattr/xattr_unsupported.go000066400000000000000000000004461304443252500261740ustar00rootroot00000000000000// +build !linux package xattr func Listxattr(path string) ([]string, error) { return nil, ErrNotSupportedPlatform } func Getxattr(path, attr string) (string, error) { return "", ErrNotSupportedPlatform } func Setxattr(path, xattr, value string) error { return ErrNotSupportedPlatform } docker-runc-tags-docker-1.13.1/list.go000066400000000000000000000077461304443252500175340ustar00rootroot00000000000000// +build linux package main import ( "fmt" "io/ioutil" "os" "path/filepath" "text/tabwriter" "time" "encoding/json" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/utils" "github.com/urfave/cli" ) const formatOptions = `table or json` // containerState represents the platform agnostic pieces relating to a // running container's status and state type containerState struct { // Version is the OCI version for the container Version string `json:"ociVersion"` // ID is the container ID ID string `json:"id"` // InitProcessPid is the init process id in the parent namespace InitProcessPid int `json:"pid"` // Status is the current status of the container, running, paused, ... Status string `json:"status"` // Bundle is the path on the filesystem to the bundle Bundle string `json:"bundle"` // Rootfs is a path to a directory containing the container's root filesystem. Rootfs string `json:"rootfs"` // Created is the unix timestamp for the creation time of the container in UTC Created time.Time `json:"created"` // Annotations is the user defined annotations added to the config. Annotations map[string]string `json:"annotations,omitempty"` } var listCommand = cli.Command{ Name: "list", Usage: "lists containers started by runc with the given root", ArgsUsage: ` Where the given root is specified via the global option "--root" (default: "/run/runc"). EXAMPLE 1: To list containers created via the default "--root": # runc list EXAMPLE 2: To list containers created using a non-default value for "--root": # runc --root value list`, Flags: []cli.Flag{ cli.StringFlag{ Name: "format, f", Value: "table", Usage: `select one of: ` + formatOptions, }, cli.BoolFlag{ Name: "quiet, q", Usage: "display only container IDs", }, }, Action: func(context *cli.Context) error { s, err := getContainers(context) if err != nil { return err } if context.Bool("quiet") { for _, item := range s { fmt.Println(item.ID) } return nil } switch context.String("format") { case "table": w := tabwriter.NewWriter(os.Stdout, 12, 1, 3, ' ', 0) fmt.Fprint(w, "ID\tPID\tSTATUS\tBUNDLE\tCREATED\n") for _, item := range s { fmt.Fprintf(w, "%s\t%d\t%s\t%s\t%s\n", item.ID, item.InitProcessPid, item.Status, item.Bundle, item.Created.Format(time.RFC3339Nano)) } if err := w.Flush(); err != nil { return err } case "json": if err := json.NewEncoder(os.Stdout).Encode(s); err != nil { return err } default: return fmt.Errorf("invalid format option") } return nil }, } func getContainers(context *cli.Context) ([]containerState, error) { factory, err := loadFactory(context) if err != nil { return nil, err } root := context.GlobalString("root") absRoot, err := filepath.Abs(root) if err != nil { return nil, err } list, err := ioutil.ReadDir(absRoot) if err != nil { fatal(err) } var s []containerState for _, item := range list { if item.IsDir() { container, err := factory.Load(item.Name()) if err != nil { fmt.Fprintf(os.Stderr, "load container %s: %v\n", item.Name(), err) continue } containerStatus, err := container.Status() if err != nil { fmt.Fprintf(os.Stderr, "status for %s: %v\n", item.Name(), err) continue } state, err := container.State() if err != nil { fmt.Fprintf(os.Stderr, "state for %s: %v\n", item.Name(), err) continue } pid := state.BaseState.InitProcessPid if containerStatus == libcontainer.Stopped { pid = 0 } bundle, annotations := utils.Annotations(state.Config.Labels) s = append(s, containerState{ Version: state.BaseState.Config.Version, ID: state.BaseState.ID, InitProcessPid: pid, Status: containerStatus.String(), Bundle: bundle, Rootfs: state.BaseState.Config.Rootfs, Created: state.BaseState.Created, Annotations: annotations, }) } } return s, nil } docker-runc-tags-docker-1.13.1/main.go000066400000000000000000000077771304443252500175110ustar00rootroot00000000000000package main import ( "fmt" "io" "os" "strings" "github.com/Sirupsen/logrus" "github.com/opencontainers/runtime-spec/specs-go" "github.com/urfave/cli" ) // version will be populated by the Makefile, read from // VERSION file of the source code. var version = "" // gitCommit will be the hash that the binary was built from // and will be populated by the Makefile var gitCommit = "" const ( specConfig = "config.json" usage = `Open Container Initiative runtime runc is a command line client for running applications packaged according to the Open Container Initiative (OCI) format and is a compliant implementation of the Open Container Initiative specification. runc integrates well with existing process supervisors to provide a production container runtime environment for applications. It can be used with your existing process monitoring tools and the container will be spawned as a direct child of the process supervisor. Containers are configured using bundles. A bundle for a container is a directory that includes a specification file named "` + specConfig + `" and a root filesystem. The root filesystem contains the contents of the container. To start a new instance of a container: # runc run [ -b bundle ] Where "" is your name for the instance of the container that you are starting. The name you provide for the container instance must be unique on your host. Providing the bundle directory using "-b" is optional. The default value for "bundle" is the current directory.` ) func main() { app := cli.NewApp() app.Name = "runc" app.Usage = usage var v []string if version != "" { v = append(v, version) } if gitCommit != "" { v = append(v, fmt.Sprintf("commit: %s", gitCommit)) } v = append(v, fmt.Sprintf("spec: %s", specs.Version)) app.Version = strings.Join(v, "\n") app.Flags = []cli.Flag{ cli.BoolFlag{ Name: "debug", Usage: "enable debug output for logging", }, cli.StringFlag{ Name: "log", Value: "/dev/null", Usage: "set the log file path where internal debug information is written", }, cli.StringFlag{ Name: "log-format", Value: "text", Usage: "set the format used by logs ('text' (default), or 'json')", }, cli.StringFlag{ Name: "root", Value: "/run/runc", Usage: "root directory for storage of container state (this should be located in tmpfs)", }, cli.StringFlag{ Name: "criu", Value: "criu", Usage: "path to the criu binary used for checkpoint and restore", }, cli.BoolFlag{ Name: "systemd-cgroup", Usage: "enable systemd cgroup support, expects cgroupsPath to be of form \"slice:prefix:name\" for e.g. \"system.slice:runc:434234\"", }, } app.Commands = []cli.Command{ checkpointCommand, createCommand, deleteCommand, eventsCommand, execCommand, initCommand, killCommand, listCommand, pauseCommand, psCommand, restoreCommand, resumeCommand, runCommand, specCommand, startCommand, stateCommand, updateCommand, } app.Before = func(context *cli.Context) error { if context.GlobalBool("debug") { logrus.SetLevel(logrus.DebugLevel) } if path := context.GlobalString("log"); path != "" { f, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_APPEND|os.O_SYNC, 0666) if err != nil { return err } logrus.SetOutput(f) } switch context.GlobalString("log-format") { case "text": // retain logrus's default. case "json": logrus.SetFormatter(new(logrus.JSONFormatter)) default: return fmt.Errorf("unknown log-format %q", context.GlobalString("log-format")) } return nil } // If the command returns an error, cli takes upon itself to print // the error on cli.ErrWriter and exit. // Use our own writer here to ensure the log gets sent to the right location. cli.ErrWriter = &FatalWriter{cli.ErrWriter} if err := app.Run(os.Args); err != nil { fatal(err) } } type FatalWriter struct { cliErrWriter io.Writer } func (f *FatalWriter) Write(p []byte) (n int, err error) { logrus.Error(string(p)) return f.cliErrWriter.Write(p) } docker-runc-tags-docker-1.13.1/main_solaris.go000066400000000000000000000007341304443252500212270ustar00rootroot00000000000000// +build solaris package main import "github.com/urfave/cli" var ( checkpointCommand cli.Command eventsCommand cli.Command restoreCommand cli.Command specCommand cli.Command killCommand cli.Command deleteCommand cli.Command execCommand cli.Command initCommand cli.Command listCommand cli.Command pauseCommand cli.Command resumeCommand cli.Command startCommand cli.Command stateCommand cli.Command ) docker-runc-tags-docker-1.13.1/main_unix.go000066400000000000000000000014441304443252500205350ustar00rootroot00000000000000// +build linux package main import ( "os" "runtime" "github.com/opencontainers/runc/libcontainer" _ "github.com/opencontainers/runc/libcontainer/nsenter" "github.com/urfave/cli" ) func init() { if len(os.Args) > 1 && os.Args[1] == "init" { runtime.GOMAXPROCS(1) runtime.LockOSThread() } } var initCommand = cli.Command{ Name: "init", Usage: `initialize the namespaces and launch the process (do not call it outside of runc)`, Action: func(context *cli.Context) error { factory, _ := libcontainer.New("") if err := factory.StartInitialization(); err != nil { // as the error is sent back to the parent there is no need to log // or write it to stderr because the parent process will handle this os.Exit(1) } panic("libcontainer: container init failed to exec") }, } docker-runc-tags-docker-1.13.1/main_unsupported.go000066400000000000000000000003541304443252500221410ustar00rootroot00000000000000// +build !linux,!solaris package main import "github.com/urfave/cli" var ( checkpointCommand cli.Command eventsCommand cli.Command restoreCommand cli.Command specCommand cli.Command killCommand cli.Command ) docker-runc-tags-docker-1.13.1/man/000077500000000000000000000000001304443252500167675ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/man/README.md000066400000000000000000000003471304443252500202520ustar00rootroot00000000000000runc man pages ==================== This directory contains man pages for runc in markdown format. To generate man pages from it, use this command ./md2man-all.sh You will see man pages generated under the man8 directory. docker-runc-tags-docker-1.13.1/man/md2man-all.sh000077500000000000000000000006631304443252500212570ustar00rootroot00000000000000#!/bin/bash set -e # get into this script's directory cd "$(dirname "$(readlink -f "$BASH_SOURCE")")" [ "$1" = '-q' ] || { set -x pwd } for FILE in *.md; do base="$(basename "$FILE")" name="${base%.md}" num="${name##*.}" if [ -z "$num" -o "$name" = "$num" ]; then # skip files that aren't of the format xxxx.N.md (like README.md) continue fi mkdir -p "./man${num}" go-md2man -in "$FILE" -out "./man${num}/${name}" done docker-runc-tags-docker-1.13.1/man/runc-checkpoint.8.md000066400000000000000000000017251304443252500225600ustar00rootroot00000000000000# NAME runc checkpoint - checkpoint a running container # SYNOPSIS runc checkpoint [command options] Where "" is the name for the instance of the container to be checkpointed. # DESCRIPTION The checkpoint command saves the state of the container instance. # OPTIONS --image-path value path for saving criu image files --work-path value path for saving work files and logs --leave-running leave the process running after checkpointing --tcp-established allow open tcp connections --ext-unix-sk allow external unix sockets --shell-job allow shell jobs --page-server value ADDRESS:PORT of the page server --file-locks handle file locks, for safety --manage-cgroups-mode value cgroups mode: 'soft' (default), 'full' and 'strict' --empty-ns value create a namespace, but don't restore its properies docker-runc-tags-docker-1.13.1/man/runc-create.8.md000066400000000000000000000024411304443252500216700ustar00rootroot00000000000000# NAME runc create - create a container # SYNOPSIS runc create [command options] Where "" is your name for the instance of the container that you are starting. The name you provide for the container instance must be unique on your host. # DESCRIPTION The create command creates an instance of a container for a bundle. The bundle is a directory with a specification file named "config.json" and a root filesystem. The specification file includes an args parameter. The args parameter is used to specify command(s) that get run when the container is started. To change the command(s) that get executed on start, edit the args parameter of the spec. See "runc spec --help" for more explanation. # OPTIONS --bundle value, -b value path to the root of the bundle directory, defaults to the current directory --console value specify the pty slave path for use with the container --pid-file value specify the file to write the process id to --no-pivot do not use pivot root to jail process inside rootfs. This should be used whenever the rootfs is on top of a ramdisk --no-new-keyring do not create a new session keyring for the container. This will cause the container to inherit the calling processes session key docker-runc-tags-docker-1.13.1/man/runc-delete.8.md000066400000000000000000000011621304443252500216660ustar00rootroot00000000000000# NAME runc delete - delete any resources held by one or more containers often used with detached containers # SYNOPSIS runc delete [command options] [container-id...] Where "" is the name for the instance of the container. # OPTIONS --force, -f Forcibly deletes the container if it is still running (uses SIGKILL) # EXAMPLE For example, if the container id is "ubuntu01" and runc list currently shows the status of "ubuntu01" as "stopped" the following will delete resources held for "ubuntu01" removing "ubuntu01" from the runc list of containers: # runc delete ubuntu01 docker-runc-tags-docker-1.13.1/man/runc-events.8.md000066400000000000000000000010241304443252500217250ustar00rootroot00000000000000# NAME runc events - display container events such as OOM notifications, cpu, memory, and IO usage statistics # SYNOPSIS runc events [command options] Where "" is the name for the instance of the container. # DESCRIPTION The events command displays information about the container. By default the information is displayed once every 5 seconds. # OPTIONS --interval value set the stats collection interval (default: 5s) --stats display the container's stats then exit docker-runc-tags-docker-1.13.1/man/runc-exec.8.md000066400000000000000000000027021304443252500213510ustar00rootroot00000000000000# NAME runc exec - execute new process inside the container # SYNOPSIS runc exec [command options] -- [args...] Where "" is the name for the instance of the container and "" is the command to be executed in the container. # EXAMPLE For example, if the container is configured to run the linux ps command the following will output a list of processes running in the container: # runc exec ps # OPTIONS --console value specify the pty slave path for use with the container --cwd value current working directory in the container --env value, -e value set environment variables --tty, -t allocate a pseudo-TTY --user value, -u value UID (format: [:]) --process value, -p value path to the process.json --detach, -d detach from the container's process --pid-file value specify the file to write the process id to --process-label value set the asm process label for the process commonly used with selinux --apparmor value set the apparmor profile for the process --no-new-privs set the no new privileges value for the process --cap value, -c value add a capability to the bounding set for the process --no-subreaper disable the use of the subreaper used to reap reparented processes docker-runc-tags-docker-1.13.1/man/runc-kill.8.md000066400000000000000000000010761304443252500213630ustar00rootroot00000000000000# NAME runc kill - kill sends the specified signal (default: SIGTERM) to the container's init process # SYNOPSIS runc kill [command options] Where "" is the name for the instance of the container and "" is the signal to be sent to the init process. # OPTIONS --all, -a send the specified signal to all processes inside the container # EXAMPLE For example, if the container id is "ubuntu01" the following will send a "KILL" signal to the init process of the "ubuntu01" container: # runc kill ubuntu01 KILL docker-runc-tags-docker-1.13.1/man/runc-list.8.md000066400000000000000000000010351304443252500213760ustar00rootroot00000000000000# NAME runc list - lists containers started by runc with the given root # SYNOPSIS runc list [command options] # EXAMPLE Where the given root is specified via the global option "--root" (default: "/run/runc"). To list containers created via the default "--root": # runc list To list containers created using a non-default value for "--root": # runc --root value list # OPTIONS --format value, -f value select one of: table or json (default: "table") --quiet, -q display only container IDs docker-runc-tags-docker-1.13.1/man/runc-pause.8.md000066400000000000000000000006011304443252500215360ustar00rootroot00000000000000# NAME runc pause - pause suspends all processes inside the container # SYNOPSIS runc pause [container-id...] Where "" is the name for the instance of the container to be paused. # DESCRIPTION The pause command suspends all processes in the instance of the container. Use runc list to identiy instances of containers and their current status. docker-runc-tags-docker-1.13.1/man/runc-ps.8.md000066400000000000000000000005371304443252500210530ustar00rootroot00000000000000# NAME runc ps - ps displays the processes running inside a container # SYNOPSIS runc ps [command options] [-- ps options] # OPTIONS --format value, -f value select one of: table(default) or json The default format is table. The following will output the processes of a container in json format: # runc ps -f json docker-runc-tags-docker-1.13.1/man/runc-restore.8.md000066400000000000000000000023761304443252500221170ustar00rootroot00000000000000# NAME runc restore - restore a container from a previous checkpoint # SYNOPSIS runc restore [command options] Where "" is the name for the instance of the container to be restored. # DESCRIPTION Restores the saved state of the container instance that was previously saved using the runc checkpoint command. # OPTIONS --image-path value path to criu image files for restoring --work-path value path for saving work files and logs --tcp-established allow open tcp connections --ext-unix-sk allow external unix sockets --shell-job allow shell jobs --file-locks handle file locks, for safety --manage-cgroups-mode value cgroups mode: 'soft' (default), 'full' and 'strict' --bundle value, -b value path to the root of the bundle directory --detach, -d detach from the container's process --pid-file value specify the file to write the process id to --no-subreaper disable the use of the subreaper used to reap reparented processes --no-pivot do not use pivot root to jail process inside rootfs. This should be used whenever the rootfs is on top of a ramdisk docker-runc-tags-docker-1.13.1/man/runc-resume.8.md000066400000000000000000000006101304443252500217210ustar00rootroot00000000000000# NAME runc resume - resumes all processes that have been previously paused # SYNOPSIS runc resume [container-id...] Where "" is the name for the instance of the container to be resumed. # DESCRIPTION The resume command resumes all processes in the instance of the container. Use runc list to identiy instances of containers and their current status. docker-runc-tags-docker-1.13.1/man/runc-run.8.md000066400000000000000000000027011304443252500212300ustar00rootroot00000000000000# NAME runc run - create and run a container # SYNOPSIS runc run [command options] Where "" is your name for the instance of the container that you are starting. The name you provide for the container instance must be unique on your host. # DESCRIPTION The run command creates an instance of a container for a bundle. The bundle is a directory with a specification file named "config.json" and a root filesystem. The specification file includes an args parameter. The args parameter is used to specify command(s) that get run when the container is started. To change the command(s) that get executed on start, edit the args parameter of the spec. See "runc spec --help" for more explanation. # OPTIONS --bundle value, -b value path to the root of the bundle directory, defaults to the current directory --console value specify the pty slave path for use with the container --detach, -d detach from the container's process --pid-file value specify the file to write the process id to --no-subreaper disable the use of the subreaper used to reap reparented processes --no-pivot do not use pivot root to jail process inside rootfs. This should be used whenever the rootfs is on top of a ramdisk --no-new-keyring do not create a new session keyring for the container. This will cause the container to inherit the calling processes session key docker-runc-tags-docker-1.13.1/man/runc-spec.8.md000066400000000000000000000037521304443252500213650ustar00rootroot00000000000000# NAME runc spec - create a new specification file # SYNOPSIS runc spec [command options] [arguments...] # DESCRIPTION The spec command creates the new specification file named "config.json" for the bundle. The spec generated is just a starter file. Editing of the spec is required to achieve desired results. For example, the newly generated spec includes an args parameter that is initially set to call the "sh" command when the container is started. Calling "sh" may work for an ubuntu container or busybox, but will not work for containers that do not include the "sh" program. # EXAMPLE To run docker's hello-world container one needs to set the args parameter in the spec to call hello. This can be done using the sed command or a text editor. The following commands create a bundle for hello-world, change the default args parameter in the spec from "sh" to "/hello", then run the hello command in a new hello-world container named container1: mkdir hello cd hello docker pull hello-world docker export $(docker create hello-world) > hello-world.tar mkdir rootfs tar -C rootfs -xf hello-world.tar runc spec sed -i 's;"sh";"/hello";' config.json runc start container1 In the start command above, "container1" is the name for the instance of the container that you are starting. The name you provide for the container instance must be unique on your host. An alternative for generating a customized spec config is to use "ocitools", the sub-command "ocitools generate" has lots of options that can be used to do any customizations as you want, see [ocitools](https://github.com/opencontainers/ocitools) to get more information. When starting a container through runc, runc needs root privilege. If not already running as root, you can use sudo to give runc root privilege. For example: "sudo runc start container1" will give runc root privilege to start the container on your host. # OPTIONS --bundle value, -b value path to the root of the bundle directory docker-runc-tags-docker-1.13.1/man/runc-start.8.md000066400000000000000000000006431304443252500215640ustar00rootroot00000000000000# NAME runc start - start signals a created container to execute the user defined process # SYNOPSIS runc start [container-id...] Where "" is your name for the instance of the container that you are starting. The name you provide for the container instance must be unique on your host. # DESCRIPTIONa The start command signals the container to start the user's defined process. docker-runc-tags-docker-1.13.1/man/runc-state.8.md000066400000000000000000000004171304443252500215460ustar00rootroot00000000000000# NAME runc state - output the state of a container # SYNOPSIS runc state Where "" is your name for the instance of the container. # DESCRIPTION The state command outputs current state information for the instance of a container. docker-runc-tags-docker-1.13.1/man/runc-update.8.md000066400000000000000000000036311304443252500217110ustar00rootroot00000000000000# NAME runc update - update container resource constraints # SYNOPSIS runc update [command options] # DESCRIPTION The data can be read from a file or the standard input, the accepted format is as follow (unchanged values can be omitted): { "memory": { "limit": 0, "reservation": 0, "swap": 0, "kernel": 0, "kernelTCP": 0 }, "cpu": { "shares": 0, "quota": 0, "period": 0, "realtimeRuntime": 0, "realtimePeriod": 0, "cpus": "", "mems": "" }, "blockIO": { "blkioWeight": 0 } } Note: if data is to be read from a file or the standard input, all other options are ignored. # OPTIONS --resources value, -r value path to the file containing the resources to update or '-' to read from the standard input --blkio-weight value Specifies per cgroup weight, range is from 10 to 1000 (default: 0) --cpu-period value CPU CFS period to be used for hardcapping (in usecs). 0 to use system default --cpu-quota value CPU CFS hardcap limit (in usecs). Allowed cpu time in a given period --cpu-rt-period value CPU realtime period to be used for hardcapping (in usecs). 0 to use system default --cpu-rt-runtime value CPU realtime hardcap limit (in usecs). Allowed cpu time in a given period --cpu-share value CPU shares (relative weight vs. other containers) --cpuset-cpus value CPU(s) to use --cpuset-mems value Memory node(s) to use --kernel-memory value Kernel memory limit (in bytes) --kernel-memory-tcp value Kernel memory limit (in bytes) for tcp buffer --memory value Memory limit (in bytes) --memory-reservation value Memory reservation or soft_limit (in bytes) --memory-swap value Total memory usage (memory + swap); set '-1' to enable unlimited swap docker-runc-tags-docker-1.13.1/man/runc.8.md000066400000000000000000000060571304443252500204360ustar00rootroot00000000000000# NAME runc - Open Container Initiative runtime # SYNOPSIS runc [global options] command [command options] [arguments...] # DESCRIPTION runc is a command line client for running applications packaged according to the Open Container Initiative (OCI) format and is a compliant implementation of the Open Container Initiative specification. runc integrates well with existing process supervisors to provide a production container runtime environment for applications. It can be used with your existing process monitoring tools and the container will be spawned as a direct child of the process supervisor. Containers are configured using bundles. A bundle for a container is a directory that includes a specification file named "config.json" and a root filesystem. The root filesystem contains the contents of the container. To start a new instance of a container: # runc start [ -b bundle ] Where "" is your name for the instance of the container that you are starting. The name you provide for the container instance must be unique on your host. Providing the bundle directory using "-b" is optional. The default value for "bundle" is the current directory. # COMMANDS checkpoint checkpoint a running container delete delete any resources held by the container often used with detached containers events display container events such as OOM notifications, cpu, memory, IO and network stats exec execute new process inside the container init initialize the namespaces and launch the process (do not call it outside of runc) kill kill sends the specified signal (default: SIGTERM) to the container's init process list lists containers started by runc with the given root pause pause suspends all processes inside the container ps displays the processes running inside a container restore restore a container from a previous checkpoint resume resumes all processes that have been previously paused run create and run a container spec create a new specification file start executes the user defined process in a created container state output the state of a container update update container resource constraints help, h Shows a list of commands or help for one command # GLOBAL OPTIONS --debug enable debug output for logging --log value set the log file path where internal debug information is written (default: "/dev/null") --log-format value set the format used by logs ('text' (default), or 'json') (default: "text") --root value root directory for storage of container state (this should be located in tmpfs) (default: "/run/runc") --criu value path to the criu binary used for checkpoint and restore (default: "criu") --systemd-cgroup enable systemd cgroup support, expects cgroupsPath to be of form "slice:prefix:name" for e.g. "system.slice:runc:434234" --help, -h show help --version, -v print the version docker-runc-tags-docker-1.13.1/pause.go000066400000000000000000000043661304443252500176710ustar00rootroot00000000000000// +build linux package main import ( "fmt" "os" "github.com/urfave/cli" ) var pauseCommand = cli.Command{ Name: "pause", Usage: "pause suspends all processes inside the container", ArgsUsage: ` [container-id...] Where "" is the name for the instance of the container to be paused. `, Description: `The pause command suspends all processes in the instance of the container. Use runc list to identiy instances of containers and their current status.`, Action: func(context *cli.Context) error { hasError := false if !context.Args().Present() { return fmt.Errorf("runc: \"pause\" requires a minimum of 1 argument") } factory, err := loadFactory(context) if err != nil { return err } for _, id := range context.Args() { container, err := factory.Load(id) if err != nil { fmt.Fprintf(os.Stderr, "container %s does not exist\n", id) hasError = true continue } if err := container.Pause(); err != nil { fmt.Fprintf(os.Stderr, "pause container %s : %s\n", id, err) hasError = true } } if hasError { return fmt.Errorf("one or more of container pause failed") } return nil }, } var resumeCommand = cli.Command{ Name: "resume", Usage: "resumes all processes that have been previously paused", ArgsUsage: ` [container-id...] Where "" is the name for the instance of the container to be resumed.`, Description: `The resume command resumes all processes in the instance of the container. Use runc list to identiy instances of containers and their current status.`, Action: func(context *cli.Context) error { hasError := false if !context.Args().Present() { return fmt.Errorf("runc: \"resume\" requires a minimum of 1 argument") } factory, err := loadFactory(context) if err != nil { return err } for _, id := range context.Args() { container, err := factory.Load(id) if err != nil { fmt.Fprintf(os.Stderr, "container %s does not exist\n", id) hasError = true continue } if err := container.Resume(); err != nil { fmt.Fprintf(os.Stderr, "resume container %s : %s\n", id, err) hasError = true } } if hasError { return fmt.Errorf("one or more of container resume failed") } return nil }, } docker-runc-tags-docker-1.13.1/ps.go000066400000000000000000000036051304443252500171710ustar00rootroot00000000000000// +build linux package main import ( "encoding/json" "fmt" "os" "os/exec" "strconv" "strings" "github.com/urfave/cli" ) var psCommand = cli.Command{ Name: "ps", Usage: "ps displays the processes running inside a container", ArgsUsage: ` [ps options]`, Flags: []cli.Flag{ cli.StringFlag{ Name: "format, f", Value: "", Usage: `select one of: ` + formatOptions, }, }, Action: func(context *cli.Context) error { container, err := getContainer(context) if err != nil { return err } pids, err := container.Processes() if err != nil { return err } if context.String("format") == "json" { if err := json.NewEncoder(os.Stdout).Encode(pids); err != nil { return err } return nil } // [1:] is to remove command name, ex: // context.Args(): [containet_id ps_arg1 ps_arg2 ...] // psArgs: [ps_arg1 ps_arg2 ...] // psArgs := context.Args()[1:] if len(psArgs) == 0 { psArgs = []string{"-ef"} } cmd := exec.Command("ps", psArgs...) output, err := cmd.CombinedOutput() if err != nil { return fmt.Errorf("%s: %s", err, output) } lines := strings.Split(string(output), "\n") pidIndex, err := getPidIndex(lines[0]) if err != nil { return err } fmt.Println(lines[0]) for _, line := range lines[1:] { if len(line) == 0 { continue } fields := strings.Fields(line) p, err := strconv.Atoi(fields[pidIndex]) if err != nil { return fmt.Errorf("unexpected pid '%s': %s", fields[pidIndex], err) } for _, pid := range pids { if pid == p { fmt.Println(line) break } } } return nil }, SkipArgReorder: true, } func getPidIndex(title string) (int, error) { titles := strings.Fields(title) pidIndex := -1 for i, name := range titles { if name == "PID" { return i, nil } } return pidIndex, fmt.Errorf("couldn't find PID field in ps output") } docker-runc-tags-docker-1.13.1/restore.go000066400000000000000000000117511304443252500202330ustar00rootroot00000000000000// +build linux package main import ( "os" "syscall" "github.com/Sirupsen/logrus" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/specconv" "github.com/opencontainers/runtime-spec/specs-go" "github.com/urfave/cli" ) var restoreCommand = cli.Command{ Name: "restore", Usage: "restore a container from a previous checkpoint", ArgsUsage: ` Where "" is the name for the instance of the container to be restored.`, Description: `Restores the saved state of the container instance that was previously saved using the runc checkpoint command.`, Flags: []cli.Flag{ cli.StringFlag{ Name: "image-path", Value: "", Usage: "path to criu image files for restoring", }, cli.StringFlag{ Name: "work-path", Value: "", Usage: "path for saving work files and logs", }, cli.BoolFlag{ Name: "tcp-established", Usage: "allow open tcp connections", }, cli.BoolFlag{ Name: "ext-unix-sk", Usage: "allow external unix sockets", }, cli.BoolFlag{ Name: "shell-job", Usage: "allow shell jobs", }, cli.BoolFlag{ Name: "file-locks", Usage: "handle file locks, for safety", }, cli.StringFlag{ Name: "manage-cgroups-mode", Value: "", Usage: "cgroups mode: 'soft' (default), 'full' and 'strict'", }, cli.StringFlag{ Name: "bundle, b", Value: "", Usage: "path to the root of the bundle directory", }, cli.BoolFlag{ Name: "detach,d", Usage: "detach from the container's process", }, cli.StringFlag{ Name: "pid-file", Value: "", Usage: "specify the file to write the process id to", }, cli.BoolFlag{ Name: "no-subreaper", Usage: "disable the use of the subreaper used to reap reparented processes", }, cli.BoolFlag{ Name: "no-pivot", Usage: "do not use pivot root to jail process inside rootfs. This should be used whenever the rootfs is on top of a ramdisk", }, cli.StringSliceFlag{ Name: "empty-ns", Usage: "create a namespace, but don't restore its properies", }, }, Action: func(context *cli.Context) error { imagePath := context.String("image-path") id := context.Args().First() if id == "" { return errEmptyID } if imagePath == "" { imagePath = getDefaultImagePath(context) } bundle := context.String("bundle") if bundle != "" { if err := os.Chdir(bundle); err != nil { return err } } spec, err := loadSpec(specConfig) if err != nil { return err } config, err := specconv.CreateLibcontainerConfig(&specconv.CreateOpts{ CgroupName: id, UseSystemdCgroup: context.GlobalBool("systemd-cgroup"), NoPivotRoot: context.Bool("no-pivot"), Spec: spec, }) if err != nil { return err } status, err := restoreContainer(context, spec, config, imagePath) if err == nil { os.Exit(status) } return err }, } func restoreContainer(context *cli.Context, spec *specs.Spec, config *configs.Config, imagePath string) (int, error) { var ( rootuid = 0 rootgid = 0 id = context.Args().First() ) factory, err := loadFactory(context) if err != nil { return -1, err } container, err := factory.Load(id) if err != nil { container, err = factory.Create(id, config) if err != nil { return -1, err } } options := criuOptions(context) status, err := container.Status() if err != nil { logrus.Error(err) } if status == libcontainer.Running { fatalf("Container with id %s already running", id) } setManageCgroupsMode(context, options) if err := setEmptyNsMask(context, options); err != nil { return -1, err } // ensure that the container is always removed if we were the process // that created it. detach := context.Bool("detach") if !detach { defer destroy(container) } process := &libcontainer.Process{} tty, err := setupIO(process, rootuid, rootgid, "", false, detach) if err != nil { return -1, err } defer tty.Close() handler := newSignalHandler(tty, !context.Bool("no-subreaper")) if err := container.Restore(process, options); err != nil { return -1, err } if err := tty.ClosePostStart(); err != nil { return -1, err } if pidFile := context.String("pid-file"); pidFile != "" { if err := createPidFile(pidFile, process); err != nil { process.Signal(syscall.SIGKILL) process.Wait() return -1, err } } if detach { return 0, nil } return handler.forward(process) } func criuOptions(context *cli.Context) *libcontainer.CriuOpts { imagePath := getCheckpointImagePath(context) if err := os.MkdirAll(imagePath, 0655); err != nil { fatal(err) } return &libcontainer.CriuOpts{ ImagesDirectory: imagePath, WorkDirectory: context.String("work-path"), LeaveRunning: context.Bool("leave-running"), TcpEstablished: context.Bool("tcp-established"), ExternalUnixConnections: context.Bool("ext-unix-sk"), ShellJob: context.Bool("shell-job"), FileLocks: context.Bool("file-locks"), } } docker-runc-tags-docker-1.13.1/rlimit_linux.go000066400000000000000000000031631304443252500212650ustar00rootroot00000000000000package main import "fmt" const ( RLIMIT_CPU = iota // CPU time in sec RLIMIT_FSIZE // Maximum filesize RLIMIT_DATA // max data size RLIMIT_STACK // max stack size RLIMIT_CORE // max core file size RLIMIT_RSS // max resident set size RLIMIT_NPROC // max number of processes RLIMIT_NOFILE // max number of open files RLIMIT_MEMLOCK // max locked-in-memory address space RLIMIT_AS // address space limit RLIMIT_LOCKS // maximum file locks held RLIMIT_SIGPENDING // max number of pending signals RLIMIT_MSGQUEUE // maximum bytes in POSIX mqueues RLIMIT_NICE // max nice prio allowed to raise to RLIMIT_RTPRIO // maximum realtime priority RLIMIT_RTTIME // timeout for RT tasks in us ) var rlimitMap = map[string]int{ "RLIMIT_CPU": RLIMIT_CPU, "RLIMIT_FSIZE": RLIMIT_FSIZE, "RLIMIT_DATA": RLIMIT_DATA, "RLIMIT_STACK": RLIMIT_STACK, "RLIMIT_CORE": RLIMIT_CORE, "RLIMIT_RSS": RLIMIT_RSS, "RLIMIT_NPROC": RLIMIT_NPROC, "RLIMIT_NOFILE": RLIMIT_NOFILE, "RLIMIT_MEMLOCK": RLIMIT_MEMLOCK, "RLIMIT_AS": RLIMIT_AS, "RLIMIT_LOCKS": RLIMIT_LOCKS, "RLIMIT_SIGPENDING": RLIMIT_SIGPENDING, "RLIMIT_MSGQUEUE": RLIMIT_MSGQUEUE, "RLIMIT_NICE": RLIMIT_NICE, "RLIMIT_RTPRIO": RLIMIT_RTPRIO, "RLIMIT_RTTIME": RLIMIT_RTTIME, } func strToRlimit(key string) (int, error) { rl, ok := rlimitMap[key] if !ok { return 0, fmt.Errorf("Wrong rlimit value: %s", key) } return rl, nil } docker-runc-tags-docker-1.13.1/run.go000066400000000000000000000042601304443252500173510ustar00rootroot00000000000000// +build linux package main import ( "os" "github.com/urfave/cli" ) // default action is to start a container var runCommand = cli.Command{ Name: "run", Usage: "create and run a container", ArgsUsage: ` Where "" is your name for the instance of the container that you are starting. The name you provide for the container instance must be unique on your host.`, Description: `The run command creates an instance of a container for a bundle. The bundle is a directory with a specification file named "` + specConfig + `" and a root filesystem. The specification file includes an args parameter. The args parameter is used to specify command(s) that get run when the container is started. To change the command(s) that get executed on start, edit the args parameter of the spec. See "runc spec --help" for more explanation.`, Flags: []cli.Flag{ cli.StringFlag{ Name: "bundle, b", Value: "", Usage: `path to the root of the bundle directory, defaults to the current directory`, }, cli.StringFlag{ Name: "console", Value: "", Usage: "specify the pty slave path for use with the container", }, cli.BoolFlag{ Name: "detach, d", Usage: "detach from the container's process", }, cli.StringFlag{ Name: "pid-file", Value: "", Usage: "specify the file to write the process id to", }, cli.BoolFlag{ Name: "no-subreaper", Usage: "disable the use of the subreaper used to reap reparented processes", }, cli.BoolFlag{ Name: "no-pivot", Usage: "do not use pivot root to jail process inside rootfs. This should be used whenever the rootfs is on top of a ramdisk", }, cli.BoolFlag{ Name: "no-new-keyring", Usage: "do not create a new session keyring for the container. This will cause the container to inherit the calling processes session key", }, }, Action: func(context *cli.Context) error { spec, err := setupSpec(context) if err != nil { return err } status, err := startContainer(context, spec, false) if err == nil { // exit with the container's exit status so any external supervisor is // notified of the exit with the correct exit status. os.Exit(status) } return err }, } docker-runc-tags-docker-1.13.1/script/000077500000000000000000000000001304443252500175205ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/script/.validate000066400000000000000000000017201304443252500213120ustar00rootroot00000000000000#!/bin/bash if [ -z "$VALIDATE_UPSTREAM" ]; then # this is kind of an expensive check, so let's not do this twice if we # are running more than one validate bundlescript VALIDATE_REPO='https://github.com/opencontainers/runc.git' VALIDATE_BRANCH='master' if [ "$TRAVIS" = 'true' -a "$TRAVIS_PULL_REQUEST" != 'false' ]; then VALIDATE_REPO="https://github.com/${TRAVIS_REPO_SLUG}.git" VALIDATE_BRANCH="${TRAVIS_BRANCH}" fi VALIDATE_HEAD="$(git rev-parse --verify HEAD)" git fetch -q "$VALIDATE_REPO" "refs/heads/$VALIDATE_BRANCH" VALIDATE_UPSTREAM="$(git rev-parse --verify FETCH_HEAD)" VALIDATE_COMMIT_LOG="$VALIDATE_UPSTREAM..$VALIDATE_HEAD" VALIDATE_COMMIT_DIFF="$VALIDATE_UPSTREAM...$VALIDATE_HEAD" validate_diff() { if [ "$VALIDATE_UPSTREAM" != "$VALIDATE_HEAD" ]; then git diff "$VALIDATE_COMMIT_DIFF" "$@" fi } validate_log() { if [ "$VALIDATE_UPSTREAM" != "$VALIDATE_HEAD" ]; then git log "$VALIDATE_COMMIT_LOG" "$@" fi } fi docker-runc-tags-docker-1.13.1/script/check-config.sh000077500000000000000000000141551304443252500224050ustar00rootroot00000000000000#!/usr/bin/env bash set -e # bits of this were adapted from check_config.sh in docker # see also https://github.com/docker/docker/blob/master/contrib/check-config.sh possibleConfigs=( '/proc/config.gz' "/boot/config-$(uname -r)" "/usr/src/linux-$(uname -r)/.config" '/usr/src/linux/.config' ) possibleConfigFiles=( 'config.gz' "config-$(uname -r)" '.config' ) if ! command -v zgrep &> /dev/null; then zgrep() { zcat "$2" | grep "$1" } fi kernelVersion="$(uname -r)" kernelMajor="${kernelVersion%%.*}" kernelMinor="${kernelVersion#$kernelMajor.}" kernelMinor="${kernelMinor%%.*}" is_set() { zgrep "CONFIG_$1=[y|m]" "$CONFIG" > /dev/null } is_set_in_kernel() { zgrep "CONFIG_$1=y" "$CONFIG" > /dev/null } is_set_as_module() { zgrep "CONFIG_$1=m" "$CONFIG" > /dev/null } color() { local codes=() if [ "$1" = 'bold' ]; then codes=( "${codes[@]}" '1' ) shift fi if [ "$#" -gt 0 ]; then local code= case "$1" in # see https://en.wikipedia.org/wiki/ANSI_escape_code#Colors black) code=30 ;; red) code=31 ;; green) code=32 ;; yellow) code=33 ;; blue) code=34 ;; magenta) code=35 ;; cyan) code=36 ;; white) code=37 ;; esac if [ "$code" ]; then codes=( "${codes[@]}" "$code" ) fi fi local IFS=';' echo -en '\033['"${codes[*]}"'m' } wrap_color() { text="$1" shift color "$@" echo -n "$text" color reset echo } wrap_good() { echo "$(wrap_color "$1" white): $(wrap_color "$2" green)" } wrap_bad() { echo "$(wrap_color "$1" bold): $(wrap_color "$2" bold red)" } wrap_warning() { wrap_color >&2 "$*" red } check_flag() { if is_set_in_kernel "$1"; then wrap_good "CONFIG_$1" 'enabled' elif is_set_as_module "$1"; then wrap_good "CONFIG_$1" 'enabled (as module)' else wrap_bad "CONFIG_$1" 'missing' fi } check_flags() { for flag in "$@"; do echo "- $(check_flag "$flag")" done } check_distro_userns() { source /etc/os-release 2>/dev/null || /bin/true if [[ "${ID}" =~ ^(centos|rhel)$ && "${VERSION_ID}" =~ ^7 ]]; then # this is a CentOS7 or RHEL7 system grep -q "user_namespace.enable=1" /proc/cmdline || { # no user namespace support enabled wrap_bad " (RHEL7/CentOS7" "User namespaces disabled; add 'user_namespace.enable=1' to boot command line)" } fi } is_config() { local config="$1" # Todo: more check [[ -f "$config" ]] && return 0 return 1 } search_config() { local target_dir="$1" [[ "$target_dir" ]] || target_dir=("${possibleConfigs[@]}") local tryConfig for tryConfig in "${target_dir[@]}"; do is_config "$tryConfig" && { CONFIG="$tryConfig" return } [[ -d "$tryConfig" ]] && { for tryFile in "${possibleConfigFiles[@]}"; do is_config "$tryConfig/$tryFile" && { CONFIG="$tryConfig/$tryFile" return } done } done wrap_warning "error: cannot find kernel config" wrap_warning " try running this script again, specifying the kernel config:" wrap_warning " CONFIG=/path/to/kernel/.config $0 or $0 /path/to/kernel/.config" exit 1 } CONFIG="$1" is_config "$CONFIG" || { if [[ ! "$CONFIG" ]]; then wrap_color "info: no config specified, searching for kernel config ..." white search_config elif [[ -d "$CONFIG" ]]; then wrap_color "info: input is a directory, searching for kernel config in this directory..." white search_config "$CONFIG" else wrap_warning "warning: $CONFIG seems not a kernel config, searching other paths for kernel config ..." search_config fi } wrap_color "info: reading kernel config from $CONFIG ..." white echo echo 'Generally Necessary:' echo -n '- ' cgroupSubsystemDir="$(awk '/[, ](cpu|cpuacct|cpuset|devices|freezer|memory)[, ]/ && $3 == "cgroup" { print $2 }' /proc/mounts | head -n1)" cgroupDir="$(dirname "$cgroupSubsystemDir")" if [ -d "$cgroupDir/cpu" -o -d "$cgroupDir/cpuacct" -o -d "$cgroupDir/cpuset" -o -d "$cgroupDir/devices" -o -d "$cgroupDir/freezer" -o -d "$cgroupDir/memory" ]; then echo "$(wrap_good 'cgroup hierarchy' 'properly mounted') [$cgroupDir]" else if [ "$cgroupSubsystemDir" ]; then echo "$(wrap_bad 'cgroup hierarchy' 'single mountpoint!') [$cgroupSubsystemDir]" else echo "$(wrap_bad 'cgroup hierarchy' 'nonexistent??')" fi echo " $(wrap_color '(see https://github.com/tianon/cgroupfs-mount)' yellow)" fi if [ "$(cat /sys/module/apparmor/parameters/enabled 2>/dev/null)" = 'Y' ]; then echo -n '- ' if command -v apparmor_parser &> /dev/null; then echo "$(wrap_good 'apparmor' 'enabled and tools installed')" else echo "$(wrap_bad 'apparmor' 'enabled, but apparmor_parser missing')" echo -n ' ' if command -v apt-get &> /dev/null; then echo "$(wrap_color '(use "apt-get install apparmor" to fix this)')" elif command -v yum &> /dev/null; then echo "$(wrap_color '(your best bet is "yum install apparmor-parser")')" else echo "$(wrap_color '(look for an "apparmor" package for your distribution)')" fi fi fi flags=( NAMESPACES {NET,PID,IPC,UTS}_NS CGROUPS CGROUP_CPUACCT CGROUP_DEVICE CGROUP_FREEZER CGROUP_SCHED CPUSETS MEMCG KEYS MACVLAN VETH BRIDGE BRIDGE_NETFILTER NF_NAT_IPV4 IP_NF_FILTER IP_NF_TARGET_MASQUERADE NETFILTER_XT_MATCH_{ADDRTYPE,CONNTRACK} NF_NAT NF_NAT_NEEDED # required for bind-mounting /dev/mqueue into containers POSIX_MQUEUE ) check_flags "${flags[@]}" echo echo 'Optional Features:' { check_flags USER_NS check_distro_userns check_flags SECCOMP check_flags CGROUP_PIDS check_flags MEMCG_SWAP MEMCG_SWAP_ENABLED if is_set MEMCG_SWAP && ! is_set MEMCG_SWAP_ENABLED; then echo " $(wrap_color '(note that cgroup swap accounting is not enabled in your kernel config, you can enable it by setting boot option "swapaccount=1")' bold black)" fi } if [ "$kernelMajor" -lt 4 ] || [ "$kernelMajor" -eq 4 -a "$kernelMinor" -le 5 ]; then check_flags MEMCG_KMEM fi if [ "$kernelMajor" -lt 3 ] || [ "$kernelMajor" -eq 3 -a "$kernelMinor" -le 18 ]; then check_flags RESOURCE_COUNTERS fi if [ "$kernelMajor" -lt 3 ] || [ "$kernelMajor" -eq 3 -a "$kernelMinor" -le 13 ]; then netprio=NETPRIO_CGROUP else netprio=CGROUP_NET_PRIO fi flags=( BLK_CGROUP BLK_DEV_THROTTLING IOSCHED_CFQ CFQ_GROUP_IOSCHED CGROUP_PERF CGROUP_HUGETLB NET_CLS_CGROUP $netprio CFS_BANDWIDTH FAIR_GROUP_SCHED RT_GROUP_SCHED ) check_flags "${flags[@]}" docker-runc-tags-docker-1.13.1/script/tmpmount000077500000000000000000000000601304443252500213250ustar00rootroot00000000000000#!/bin/bash mount -t tmpfs none /tmp exec "$@" docker-runc-tags-docker-1.13.1/script/validate-gofmt000077500000000000000000000013101304443252500223440ustar00rootroot00000000000000#!/bin/bash source "$(dirname "$BASH_SOURCE")/.validate" IFS=$'\n' files=( $(validate_diff --diff-filter=ACMR --name-only -- '*.go' | grep -v '^Godeps/' || true) ) unset IFS badFiles=() for f in "${files[@]}"; do # we use "git show" here to validate that what's committed is formatted if [ "$(git show "$VALIDATE_HEAD:$f" | gofmt -s -l)" ]; then badFiles+=( "$f" ) fi done if [ ${#badFiles[@]} -eq 0 ]; then echo 'Congratulations! All Go source files are properly formatted.' else { echo "These files are not properly gofmt'd:" for f in "${badFiles[@]}"; do echo " - $f" done echo echo 'Please reformat the above files using "gofmt -s -w" and commit the result.' echo } >&2 false fi docker-runc-tags-docker-1.13.1/signals.go000066400000000000000000000055671304443252500202200ustar00rootroot00000000000000// +build linux package main import ( "os" "os/signal" "syscall" "github.com/Sirupsen/logrus" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/runc/libcontainer/utils" ) const signalBufferSize = 2048 // newSignalHandler returns a signal handler for processing SIGCHLD and SIGWINCH signals // while still forwarding all other signals to the process. func newSignalHandler(tty *tty, enableSubreaper bool) *signalHandler { if enableSubreaper { // set us as the subreaper before registering the signal handler for the container if err := system.SetSubreaper(1); err != nil { logrus.Warn(err) } } // ensure that we have a large buffer size so that we do not miss any signals // incase we are not processing them fast enough. s := make(chan os.Signal, signalBufferSize) // handle all signals for the process. signal.Notify(s) return &signalHandler{ tty: tty, signals: s, } } // exit models a process exit status with the pid and // exit status. type exit struct { pid int status int } type signalHandler struct { signals chan os.Signal tty *tty } // forward handles the main signal event loop forwarding, resizing, or reaping depending // on the signal received. func (h *signalHandler) forward(process *libcontainer.Process) (int, error) { // make sure we know the pid of our main process so that we can return // after it dies. pid1, err := process.Pid() if err != nil { return -1, err } // perform the initial tty resize. h.tty.resize() for s := range h.signals { switch s { case syscall.SIGWINCH: h.tty.resize() case syscall.SIGCHLD: exits, err := h.reap() if err != nil { logrus.Error(err) } for _, e := range exits { logrus.WithFields(logrus.Fields{ "pid": e.pid, "status": e.status, }).Debug("process exited") if e.pid == pid1 { // call Wait() on the process even though we already have the exit // status because we must ensure that any of the go specific process // fun such as flushing pipes are complete before we return. process.Wait() return e.status, nil } } default: logrus.Debugf("sending signal to process %s", s) if err := syscall.Kill(pid1, s.(syscall.Signal)); err != nil { logrus.Error(err) } } } return -1, nil } // reap runs wait4 in a loop until we have finished processing any existing exits // then returns all exits to the main event loop for further processing. func (h *signalHandler) reap() (exits []exit, err error) { var ( ws syscall.WaitStatus rus syscall.Rusage ) for { pid, err := syscall.Wait4(-1, &ws, syscall.WNOHANG, &rus) if err != nil { if err == syscall.ECHILD { return exits, nil } return nil, err } if pid <= 0 { return exits, nil } exits = append(exits, exit{ pid: pid, status: utils.ExitStatus(ws), }) } } docker-runc-tags-docker-1.13.1/spec.go000066400000000000000000000156251304443252500175060ustar00rootroot00000000000000// +build linux package main import ( "encoding/json" "fmt" "io/ioutil" "os" "runtime" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runtime-spec/specs-go" "github.com/urfave/cli" ) var specCommand = cli.Command{ Name: "spec", Usage: "create a new specification file", ArgsUsage: "", Description: `The spec command creates the new specification file named "` + specConfig + `" for the bundle. The spec generated is just a starter file. Editing of the spec is required to achieve desired results. For example, the newly generated spec includes an args parameter that is initially set to call the "sh" command when the container is started. Calling "sh" may work for an ubuntu container or busybox, but will not work for containers that do not include the "sh" program. EXAMPLE: To run docker's hello-world container one needs to set the args parameter in the spec to call hello. This can be done using the sed command or a text editor. The following commands create a bundle for hello-world, change the default args parameter in the spec from "sh" to "/hello", then run the hello command in a new hello-world container named container1: mkdir hello cd hello docker pull hello-world docker export $(docker create hello-world) > hello-world.tar mkdir rootfs tar -C rootfs -xf hello-world.tar runc spec sed -i 's;"sh";"/hello";' ` + specConfig + ` runc run container1 In the run command above, "container1" is the name for the instance of the container that you are starting. The name you provide for the container instance must be unique on your host. An alternative for generating a customized spec config is to use "ocitools", the sub-command "ocitools generate" has lots of options that can be used to do any customizations as you want, see [ocitools](https://github.com/opencontainers/ocitools) to get more information. When starting a container through runc, runc needs root privilege. If not already running as root, you can use sudo to give runc root privilege. For example: "sudo runc start container1" will give runc root privilege to start the container on your host.`, Flags: []cli.Flag{ cli.StringFlag{ Name: "bundle, b", Value: "", Usage: "path to the root of the bundle directory", }, }, Action: func(context *cli.Context) error { spec := specs.Spec{ Version: specs.Version, Platform: specs.Platform{ OS: runtime.GOOS, Arch: runtime.GOARCH, }, Root: specs.Root{ Path: "rootfs", Readonly: true, }, Process: specs.Process{ Terminal: true, User: specs.User{}, Args: []string{ "sh", }, Env: []string{ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "TERM=xterm", }, Cwd: "/", NoNewPrivileges: true, Capabilities: []string{ "CAP_AUDIT_WRITE", "CAP_KILL", "CAP_NET_BIND_SERVICE", }, Rlimits: []specs.Rlimit{ { Type: "RLIMIT_NOFILE", Hard: uint64(1024), Soft: uint64(1024), }, }, }, Hostname: "runc", Mounts: []specs.Mount{ { Destination: "/proc", Type: "proc", Source: "proc", Options: nil, }, { Destination: "/dev", Type: "tmpfs", Source: "tmpfs", Options: []string{"nosuid", "strictatime", "mode=755", "size=65536k"}, }, { Destination: "/dev/pts", Type: "devpts", Source: "devpts", Options: []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620", "gid=5"}, }, { Destination: "/dev/shm", Type: "tmpfs", Source: "shm", Options: []string{"nosuid", "noexec", "nodev", "mode=1777", "size=65536k"}, }, { Destination: "/dev/mqueue", Type: "mqueue", Source: "mqueue", Options: []string{"nosuid", "noexec", "nodev"}, }, { Destination: "/sys", Type: "sysfs", Source: "sysfs", Options: []string{"nosuid", "noexec", "nodev", "ro"}, }, { Destination: "/sys/fs/cgroup", Type: "cgroup", Source: "cgroup", Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"}, }, }, Linux: &specs.Linux{ MaskedPaths: []string{ "/proc/kcore", "/proc/latency_stats", "/proc/timer_list", "/proc/timer_stats", "/proc/sched_debug", "/sys/firmware", }, ReadonlyPaths: []string{ "/proc/asound", "/proc/bus", "/proc/fs", "/proc/irq", "/proc/sys", "/proc/sysrq-trigger", }, Resources: &specs.Resources{ Devices: []specs.DeviceCgroup{ { Allow: false, Access: sPtr("rwm"), }, }, }, Namespaces: []specs.Namespace{ { Type: "pid", }, { Type: "network", }, { Type: "ipc", }, { Type: "uts", }, { Type: "mount", }, }, }, } checkNoFile := func(name string) error { _, err := os.Stat(name) if err == nil { return fmt.Errorf("File %s exists. Remove it first", name) } if !os.IsNotExist(err) { return err } return nil } bundle := context.String("bundle") if bundle != "" { if err := os.Chdir(bundle); err != nil { return err } } if err := checkNoFile(specConfig); err != nil { return err } data, err := json.MarshalIndent(&spec, "", "\t") if err != nil { return err } if err := ioutil.WriteFile(specConfig, data, 0666); err != nil { return err } return nil }, } func sPtr(s string) *string { return &s } func rPtr(r rune) *rune { return &r } func iPtr(i int64) *int64 { return &i } func u32Ptr(i int64) *uint32 { u := uint32(i); return &u } func fmPtr(i int64) *os.FileMode { fm := os.FileMode(i); return &fm } // loadSpec loads the specification from the provided path. func loadSpec(cPath string) (spec *specs.Spec, err error) { cf, err := os.Open(cPath) if err != nil { if os.IsNotExist(err) { return nil, fmt.Errorf("JSON specification file %s not found", cPath) } return nil, err } defer cf.Close() if err = json.NewDecoder(cf).Decode(&spec); err != nil { return nil, err } if err = validatePlatform(&spec.Platform); err != nil { return nil, err } return spec, validateProcessSpec(&spec.Process) } func createLibContainerRlimit(rlimit specs.Rlimit) (configs.Rlimit, error) { rl, err := strToRlimit(rlimit.Type) if err != nil { return configs.Rlimit{}, err } return configs.Rlimit{ Type: rl, Hard: uint64(rlimit.Hard), Soft: uint64(rlimit.Soft), }, nil } func validatePlatform(platform *specs.Platform) error { if platform.OS != runtime.GOOS { return fmt.Errorf("target os %s mismatch with current os %s", platform.OS, runtime.GOOS) } if platform.Arch != runtime.GOARCH { return fmt.Errorf("target arch %s mismatch with current arch %s", platform.Arch, runtime.GOARCH) } return nil } docker-runc-tags-docker-1.13.1/start.go000066400000000000000000000034201304443252500176770ustar00rootroot00000000000000package main import ( "fmt" "os" "github.com/opencontainers/runc/libcontainer" "github.com/urfave/cli" ) var startCommand = cli.Command{ Name: "start", Usage: "executes the user defined process in a created container", ArgsUsage: ` [container-id...] Where "" is your name for the instance of the container that you are starting. The name you provide for the container instance must be unique on your host.`, Description: `The start command executes the user defined process in a created container .`, Action: func(context *cli.Context) error { hasError := false if !context.Args().Present() { return fmt.Errorf("runc: \"start\" requires a minimum of 1 argument") } factory, err := loadFactory(context) if err != nil { return err } for _, id := range context.Args() { container, err := factory.Load(id) if err != nil { fmt.Fprintf(os.Stderr, "container %s does not exist\n", id) hasError = true continue } status, err := container.Status() if err != nil { fmt.Fprintf(os.Stderr, "status for %s: %v\n", id, err) hasError = true continue } switch status { case libcontainer.Created: if err := container.Exec(); err != nil { fmt.Fprintf(os.Stderr, "start for %s failed: %v\n", id, err) hasError = true } case libcontainer.Stopped: fmt.Fprintln(os.Stderr, "cannot start a container that has run and stopped") hasError = true case libcontainer.Running: fmt.Fprintln(os.Stderr, "cannot start an already running container") hasError = true default: fmt.Fprintf(os.Stderr, "cannot start a container in the %s state\n", status) hasError = true } } if hasError { return fmt.Errorf("one or more of container start failed") } return nil }, } docker-runc-tags-docker-1.13.1/state.go000066400000000000000000000025621304443252500176700ustar00rootroot00000000000000// +build linux package main import ( "encoding/json" "os" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/utils" "github.com/urfave/cli" ) var stateCommand = cli.Command{ Name: "state", Usage: "output the state of a container", ArgsUsage: ` Where "" is your name for the instance of the container.`, Description: `The state command outputs current state information for the instance of a container.`, Action: func(context *cli.Context) error { container, err := getContainer(context) if err != nil { return err } containerStatus, err := container.Status() if err != nil { return err } state, err := container.State() if err != nil { return err } pid := state.BaseState.InitProcessPid if containerStatus == libcontainer.Stopped { pid = 0 } bundle, annotations := utils.Annotations(state.Config.Labels) cs := containerState{ Version: state.BaseState.Config.Version, ID: state.BaseState.ID, InitProcessPid: pid, Status: containerStatus.String(), Bundle: bundle, Rootfs: state.BaseState.Config.Rootfs, Created: state.BaseState.Created, Annotations: annotations, } data, err := json.MarshalIndent(cs, "", " ") if err != nil { return err } os.Stdout.Write(data) return nil }, } docker-runc-tags-docker-1.13.1/tests/000077500000000000000000000000001304443252500173565ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/tests/integration/000077500000000000000000000000001304443252500217015ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/tests/integration/README.md000066400000000000000000000041661304443252500231670ustar00rootroot00000000000000# runc Integration Tests Integration tests provide end-to-end testing of runc. Note that integration tests do **not** replace unit tests. As a rule of thumb, code should be tested thoroughly with unit tests. Integration tests on the other hand are meant to test a specific feature end to end. Integration tests are written in *bash* using the [bats](https://github.com/sstephenson/bats) framework. ## Running integration tests The easiest way to run integration tests is with Docker: ``` $ make integration ``` Alternatively, you can run integration tests directly on your host through make: ``` $ sudo make localintegration ``` Or you can just run them directly using bats ``` $ sudo bats tests/integration ``` To run a single test bucket: ``` $ make integration TESTFLAGS="/checkpoint.bats" ``` To run them on your host, you will need to setup a development environment plus [bats](https://github.com/sstephenson/bats#installing-bats-from-source) For example: ``` $ cd ~/go/src/github.com $ git clone https://github.com/sstephenson/bats.git $ cd bats $ ./install.sh /usr/local ``` > **Note**: There are known issues running the integration tests using > **devicemapper** as a storage driver, make sure that your docker daemon > is using **aufs** if you want to successfully run the integration tests. ## Writing integration tests [helper functions] (https://github.com/opencontainers/runc/blob/master/test/integration/helpers.bash) are provided in order to facilitate writing tests. ```sh #!/usr/bin/env bats # This will load the helpers. load helpers # setup is called at the beginning of every test. function setup() { # see functions teardown_hello and setup_hello in helpers.bash, used to # create a pristine environment for running your tests teardown_hello setup_hello } # teardown is called at the end of every test. function teardown() { teardown_hello } @test "this is a simple test" { runc run containerid # "The runc macro" automatically populates $status, $output and $lines. # Please refer to bats documentation to find out more. [ "$status" -eq 0 ] # check expected output [[ "${output}" == *"Hello"* ]] } ``` docker-runc-tags-docker-1.13.1/tests/integration/cgroups.bats000066400000000000000000000043331304443252500242410ustar00rootroot00000000000000#!/usr/bin/env bats load helpers TEST_CGROUP_NAME="runc-cgroups-integration-test" CGROUP_MEMORY="${CGROUP_BASE_PATH}/${TEST_CGROUP_NAME}" function teardown() { rm -f $BATS_TMPDIR/runc-update-integration-test.json teardown_running_container test_cgroups_kmem teardown_busybox } function setup() { teardown setup_busybox } function check_cgroup_value() { cgroup=$1 source=$2 expected=$3 current=$(cat $cgroup/$source) echo $cgroup/$source echo "current" $current "!?" "$expected" [ "$current" -eq "$expected" ] } @test "runc update --kernel-memory (initialized)" { requires cgroups_kmem # Add cgroup path sed -i 's/\("linux": {\)/\1\n "cgroupsPath": "\/runc-cgroups-integration-test",/' ${BUSYBOX_BUNDLE}/config.json # Set some initial known values DATA=$(cat <<-EOF "memory": { "kernel": 16777216 }, EOF ) DATA=$(echo ${DATA} | sed 's/\n/\\n/g') sed -i "s/\(\"resources\": {\)/\1\n${DATA}/" ${BUSYBOX_BUNDLE}/config.json # run a detached busybox to work with runc run -d --console /dev/pts/ptmx test_cgroups_kmem [ "$status" -eq 0 ] wait_for_container 15 1 test_cgroups_kmem # update kernel memory limit runc update test_cgroups_kmem --kernel-memory 50331648 [ "$status" -eq 0 ] # check the value check_cgroup_value $CGROUP_MEMORY "memory.kmem.limit_in_bytes" 50331648 } @test "runc update --kernel-memory (uninitialized)" { requires cgroups_kmem # Add cgroup path sed -i 's/\("linux": {\)/\1\n "cgroupsPath": "\/runc-cgroups-integration-test",/' ${BUSYBOX_BUNDLE}/config.json # run a detached busybox to work with runc run -d --console /dev/pts/ptmx test_cgroups_kmem [ "$status" -eq 0 ] wait_for_container 15 1 test_cgroups_kmem # update kernel memory limit runc update test_cgroups_kmem --kernel-memory 50331648 # Since kernel 4.6, we can update kernel memory without initialization # because it's accounted by default. if [ "$KERNEL_MAJOR" -lt 4 ] || [ "$KERNEL_MAJOR" -eq 4 -a "$KERNEL_MINOR" -le 5 ]; then [ ! "$status" -eq 0 ] else [ "$status" -eq 0 ] check_cgroup_value $CGROUP_MEMORY "memory.kmem.limit_in_bytes" 50331648 fi } docker-runc-tags-docker-1.13.1/tests/integration/checkpoint.bats000066400000000000000000000025261304443252500247100ustar00rootroot00000000000000#!/usr/bin/env bats load helpers function setup() { teardown_busybox setup_busybox } function teardown() { teardown_busybox } @test "checkpoint and restore" { requires criu # criu does not work with external terminals so.. # setting terminal and root:readonly: to false sed -i 's;"terminal": true;"terminal": false;' config.json sed -i 's;"readonly": true;"readonly": false;' config.json sed -i 's/"sh"/"sh","-c","while :; do date; sleep 1; done"/' config.json ( # run busybox (not detached) runc run test_busybox [ "$status" -eq 0 ] ) & # check state wait_for_container 15 1 test_busybox runc state test_busybox [ "$status" -eq 0 ] [[ "${output}" == *"running"* ]] # checkpoint the running container runc --criu "$CRIU" checkpoint test_busybox # if you are having problems getting criu to work uncomment the following dump: #cat /run/opencontainer/containers/test_busybox/criu.work/dump.log [ "$status" -eq 0 ] # after checkpoint busybox is no longer running runc state test_busybox [ "$status" -ne 0 ] # restore from checkpoint ( runc --criu "$CRIU" restore test_busybox [ "$status" -eq 0 ] ) & # check state wait_for_container 15 1 test_busybox # busybox should be back up and running runc state test_busybox [ "$status" -eq 0 ] [[ "${output}" == *"running"* ]] } docker-runc-tags-docker-1.13.1/tests/integration/create.bats000066400000000000000000000021311304443252500240140ustar00rootroot00000000000000#!/usr/bin/env bats load helpers function setup() { teardown_busybox setup_busybox } function teardown() { teardown_busybox } @test "runc create" { runc create --console /dev/pts/ptmx test_busybox [ "$status" -eq 0 ] testcontainer test_busybox created # start the command runc start test_busybox [ "$status" -eq 0 ] testcontainer test_busybox running } @test "runc create exec" { runc create --console /dev/pts/ptmx test_busybox [ "$status" -eq 0 ] testcontainer test_busybox created runc exec test_busybox true [ "$status" -eq 0 ] # start the command runc start test_busybox [ "$status" -eq 0 ] testcontainer test_busybox running } @test "runc create --pid-file" { runc create --pid-file pid.txt --console /dev/pts/ptmx test_busybox [ "$status" -eq 0 ] testcontainer test_busybox created # check pid.txt was generated [ -e pid.txt ] run cat pid.txt [ "$status" -eq 0 ] [[ ${lines[0]} == $(__runc state test_busybox | jq '.pid') ]] # start the command runc start test_busybox [ "$status" -eq 0 ] testcontainer test_busybox running } docker-runc-tags-docker-1.13.1/tests/integration/debug.bats000066400000000000000000000027361304443252500236520ustar00rootroot00000000000000#!/usr/bin/env bats load helpers function setup() { teardown_hello setup_hello } function teardown() { teardown_hello } @test "global --debug" { # run hello-world runc --debug run test_hello echo "${output}" [ "$status" -eq 0 ] } @test "global --debug to --log" { # run hello-world runc --log log.out --debug run test_hello [ "$status" -eq 0 ] # check output does not include debug info [[ "${output}" != *"level=debug"* ]] # check log.out was generated [ -e log.out ] # check expected debug output was sent to log.out run cat log.out [ "$status" -eq 0 ] [[ "${output}" == *"level=debug"* ]] } @test "global --debug to --log --log-format 'text'" { # run hello-world runc --log log.out --log-format "text" --debug run test_hello [ "$status" -eq 0 ] # check output does not include debug info [[ "${output}" != *"level=debug"* ]] # check log.out was generated [ -e log.out ] # check expected debug output was sent to log.out run cat log.out [ "$status" -eq 0 ] [[ "${output}" == *"level=debug"* ]] } @test "global --debug to --log --log-format 'json'" { # run hello-world runc --log log.out --log-format "json" --debug run test_hello [ "$status" -eq 0 ] # check output does not include debug info [[ "${output}" != *"level=debug"* ]] # check log.out was generated [ -e log.out ] # check expected debug output was sent to log.out run cat log.out [ "$status" -eq 0 ] [[ "${output}" == *'"level":"debug"'* ]] } docker-runc-tags-docker-1.13.1/tests/integration/delete.bats000066400000000000000000000044561304443252500240270ustar00rootroot00000000000000#!/usr/bin/env bats load helpers function setup() { teardown_busybox setup_busybox } function teardown() { teardown_busybox } @test "runc delete" { # run busybox detached runc run -d --console /dev/pts/ptmx test_busybox [ "$status" -eq 0 ] # check state wait_for_container 15 1 test_busybox testcontainer test_busybox running runc kill test_busybox KILL # wait for busybox to be in the destroyed state retry 10 1 eval "__runc state test_busybox | grep -q 'stopped'" # delete test_busybox runc delete test_busybox runc state test_busybox [ "$status" -ne 0 ] } @test "runc delete --force" { # run busybox detached runc run -d --console /dev/pts/ptmx test_busybox [ "$status" -eq 0 ] # check state wait_for_container 15 1 test_busybox testcontainer test_busybox running # force delete test_busybox runc delete --force test_busybox runc state test_busybox [ "$status" -ne 0 ] } @test "run delete with multi-containers" { # create busybox1 detached runc create --console /dev/pts/ptmx test_busybox1 [ "$status" -eq 0 ] testcontainer test_busybox1 created # run busybox2 detached runc run -d --console /dev/pts/ptmx test_busybox2 [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox2 testcontainer test_busybox2 running # delete both test_busybox1 and test_busybox2 container runc delete test_busybox1 test_busybox2 runc state test_busybox1 [ "$status" -ne 0 ] runc state test_busybox2 [ "$status" -eq 0 ] runc kill test_busybox2 KILL # wait for busybox2 to be in the destroyed state retry 10 1 eval "__runc state test_busybox2 | grep -q 'stopped'" # delete test_busybox2 runc delete test_busybox2 runc state test_busybox2 [ "$status" -ne 0 ] } @test "run delete --force with multi-containers" { # create busybox1 detached runc create --console /dev/pts/ptmx test_busybox1 [ "$status" -eq 0 ] testcontainer test_busybox1 created # run busybox2 detached runc run -d --console /dev/pts/ptmx test_busybox2 [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox2 testcontainer test_busybox2 running # delete both test_busybox1 and test_busybox2 container runc delete --force test_busybox1 test_busybox2 runc state test_busybox1 [ "$status" -ne 0 ] runc state test_busybox2 [ "$status" -ne 0 ] } docker-runc-tags-docker-1.13.1/tests/integration/events.bats000066400000000000000000000061031304443252500240600ustar00rootroot00000000000000#!/usr/bin/env bats load helpers function setup() { teardown_busybox setup_busybox } function teardown() { teardown_busybox } @test "events --stats" { # run busybox detached runc run -d --console /dev/pts/ptmx test_busybox [ "$status" -eq 0 ] # check state wait_for_container 15 1 test_busybox # generate stats runc events --stats test_busybox [ "$status" -eq 0 ] [[ "${lines[0]}" == [\{]"\"type\""[:]"\"stats\""[,]"\"id\""[:]"\"test_busybox\""[,]* ]] [[ "${lines[0]}" == *"data"* ]] } @test "events --interval default " { # run busybox detached runc run -d --console /dev/pts/ptmx test_busybox [ "$status" -eq 0 ] # check state wait_for_container 15 1 test_busybox # spawn two sub processes (shells) # the first sub process is an event logger that sends stats events to events.log # the second sub process waits for an event that incudes test_busybox then # kills the test_busybox container which causes the event logger to exit (__runc events test_busybox > events.log) & ( retry 10 1 eval "grep -q 'test_busybox' events.log" teardown_running_container test_busybox ) & wait # wait for the above sub shells to finish [ -e events.log ] run cat events.log [ "$status" -eq 0 ] [[ "${lines[0]}" == [\{]"\"type\""[:]"\"stats\""[,]"\"id\""[:]"\"test_busybox\""[,]* ]] [[ "${lines[0]}" == *"data"* ]] } @test "events --interval 1s " { # run busybox detached runc run -d --console /dev/pts/ptmx test_busybox [ "$status" -eq 0 ] # check state wait_for_container 15 1 test_busybox # spawn two sub processes (shells) # the first sub process is an event logger that sends stats events to events.log once a second # the second sub process tries 3 times for an event that incudes test_busybox # pausing 1s between each attempt then kills the test_busybox container which # causes the event logger to exit (__runc events --interval 1s test_busybox > events.log) & ( retry 3 1 eval "grep -q 'test_busybox' events.log" teardown_running_container test_busybox ) & wait # wait for the above sub shells to finish [ -e events.log ] run eval "grep -q 'test_busybox' events.log" [ "$status" -eq 0 ] } @test "events --interval 100ms " { # run busybox detached runc run -d --console /dev/pts/ptmx test_busybox [ "$status" -eq 0 ] # check state wait_for_container 15 1 test_busybox #prove there is no carry over of events.log from a prior test [ ! -e events.log ] # spawn two sub processes (shells) # the first sub process is an event logger that sends stats events to events.log once every 100ms # the second sub process tries 3 times for an event that incudes test_busybox # pausing 100s between each attempt then kills the test_busybox container which # causes the event logger to exit (__runc events --interval 100ms test_busybox > events.log) & ( retry 3 0.100 eval "grep -q 'test_busybox' events.log" teardown_running_container test_busybox ) & wait # wait for the above sub shells to finish [ -e events.log ] run eval "grep -q 'test_busybox' events.log" [ "$status" -eq 0 ] } docker-runc-tags-docker-1.13.1/tests/integration/exec.bats000066400000000000000000000041201304443252500234750ustar00rootroot00000000000000#!/usr/bin/env bats load helpers function setup() { teardown_busybox setup_busybox } function teardown() { teardown_busybox } @test "runc exec" { # run busybox detached runc run -d --console /dev/pts/ptmx test_busybox [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox runc exec test_busybox echo Hello from exec [ "$status" -eq 0 ] echo text echoed = "'""${output}""'" [[ "${output}" == *"Hello from exec"* ]] } @test "runc exec --pid-file" { # run busybox detached runc run -d --console /dev/pts/ptmx test_busybox [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox runc exec --pid-file pid.txt test_busybox echo Hello from exec [ "$status" -eq 0 ] echo text echoed = "'""${output}""'" [[ "${output}" == *"Hello from exec"* ]] # check pid.txt was generated [ -e pid.txt ] run cat pid.txt [ "$status" -eq 0 ] [[ ${lines[0]} =~ [0-9]+ ]] [[ ${lines[0]} != $(__runc state test_busybox | jq '.pid') ]] } @test "runc exec ls -la" { # run busybox detached runc run -d --console /dev/pts/ptmx test_busybox [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox runc exec test_busybox ls -la [ "$status" -eq 0 ] [[ ${lines[0]} == *"total"* ]] [[ ${lines[1]} == *"."* ]] [[ ${lines[2]} == *".."* ]] } @test "runc exec ls -la with --cwd" { # run busybox detached runc run -d --console /dev/pts/ptmx test_busybox [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox runc exec --cwd /bin test_busybox pwd [ "$status" -eq 0 ] [[ ${output} == "/bin" ]] } @test "runc exec --env" { # run busybox detached runc run -d --console /dev/pts/ptmx test_busybox [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox runc exec --env RUNC_EXEC_TEST=true test_busybox env [ "$status" -eq 0 ] [[ ${output} == *"RUNC_EXEC_TEST=true"* ]] } @test "runc exec --user" { # run busybox detached runc run -d --console /dev/pts/ptmx test_busybox [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox runc exec --user 1000:1000 test_busybox id [ "$status" -eq 0 ] [[ ${output} == "uid=1000 gid=1000" ]] } docker-runc-tags-docker-1.13.1/tests/integration/help.bats000066400000000000000000000030711304443252500235050ustar00rootroot00000000000000#!/usr/bin/env bats load helpers @test "runc -h" { runc -h [ "$status" -eq 0 ] [[ ${lines[0]} =~ NAME:+ ]] [[ ${lines[1]} =~ runc\ '-'\ Open\ Container\ Initiative\ runtime+ ]] runc --help [ "$status" -eq 0 ] [[ ${lines[0]} =~ NAME:+ ]] [[ ${lines[1]} =~ runc\ '-'\ Open\ Container\ Initiative\ runtime+ ]] } @test "runc command -h" { runc checkpoint -h [ "$status" -eq 0 ] [[ ${lines[1]} =~ runc\ checkpoint+ ]] runc delete -h [ "$status" -eq 0 ] [[ ${lines[1]} =~ runc\ delete+ ]] runc events -h [ "$status" -eq 0 ] [[ ${lines[1]} =~ runc\ events+ ]] runc exec -h [ "$status" -eq 0 ] [[ ${lines[1]} =~ runc\ exec+ ]] runc kill -h [ "$status" -eq 0 ] [[ ${lines[1]} =~ runc\ kill+ ]] runc list -h [ "$status" -eq 0 ] [[ ${lines[0]} =~ NAME:+ ]] [[ ${lines[1]} =~ runc\ list+ ]] runc list --help [ "$status" -eq 0 ] [[ ${lines[0]} =~ NAME:+ ]] [[ ${lines[1]} =~ runc\ list+ ]] runc pause -h [ "$status" -eq 0 ] [[ ${lines[1]} =~ runc\ pause+ ]] runc restore -h [ "$status" -eq 0 ] [[ ${lines[1]} =~ runc\ restore+ ]] runc resume -h [ "$status" -eq 0 ] [[ ${lines[1]} =~ runc\ resume+ ]] runc spec -h [ "$status" -eq 0 ] [[ ${lines[1]} =~ runc\ spec+ ]] runc start -h [ "$status" -eq 0 ] [[ ${lines[1]} =~ runc\ start+ ]] runc run -h [ "$status" -eq 0 ] [[ ${lines[1]} =~ runc\ run+ ]] runc state -h [ "$status" -eq 0 ] [[ ${lines[1]} =~ runc\ state+ ]] } @test "runc foo -h" { runc foo -h [ "$status" -ne 0 ] [[ "${output}" == *"No help topic for 'foo'"* ]] } docker-runc-tags-docker-1.13.1/tests/integration/helpers.bash000066400000000000000000000104531304443252500242050ustar00rootroot00000000000000#!/bin/bash # Root directory of integration tests. INTEGRATION_ROOT=$(dirname "$(readlink -f "$BASH_SOURCE")") RUNC="${INTEGRATION_ROOT}/../../runc" GOPATH="${INTEGRATION_ROOT}/../../../.." # Test data path. TESTDATA="${INTEGRATION_ROOT}/testdata" # Busybox image BUSYBOX_IMAGE="$BATS_TMPDIR/busybox.tar" BUSYBOX_BUNDLE="$BATS_TMPDIR/busyboxtest" # hello-world in tar format HELLO_IMAGE="$TESTDATA/hello-world.tar" HELLO_BUNDLE="$BATS_TMPDIR/hello-world" # CRIU PATH CRIU="/usr/local/sbin/criu" # Kernel version KERNEL_VERSION="$(uname -r)" KERNEL_MAJOR="${KERNEL_VERSION%%.*}" KERNEL_MINOR="${KERNEL_VERSION#$KERNEL_MAJOR.}" KERNEL_MINOR="${KERNEL_MINOR%%.*}" # Root state path. ROOT="$BATS_TMPDIR/runc" # Cgroup mount CGROUP_BASE_PATH=$(grep "cgroup" /proc/self/mountinfo | gawk 'toupper($NF) ~ /\/ { print $5; exit }') # CONFIG_MEMCG_KMEM support KMEM="${CGROUP_BASE_PATH}/memory.kmem.limit_in_bytes" # Wrapper for runc. function runc() { run __runc "$@" # Some debug information to make life easier. bats will only print it if the # test failed, in which case the output is useful. echo "runc $@ (status=$status):" >&2 echo "$output" >&2 } # Raw wrapper for runc. function __runc() { "$RUNC" --root "$ROOT" "$@" } # Fails the current test, providing the error given. function fail() { echo "$@" >&2 exit 1 } # Allows a test to specify what things it requires. If the environment can't # support it, the test is skipped with a message. function requires() { for var in "$@"; do case $var in criu) if [ ! -e "$CRIU" ]; then skip "Test requires ${var}." fi ;; cgroups_kmem) if [ ! -e "$KMEM" ]; then skip "Test requires ${var}." fi ;; *) fail "BUG: Invalid requires ${var}." ;; esac done } # Retry a command $1 times until it succeeds. Wait $2 seconds between retries. function retry() { local attempts=$1 shift local delay=$1 shift local i for ((i=0; i < attempts; i++)); do run "$@" if [[ "$status" -eq 0 ]] ; then return 0 fi sleep $delay done echo "Command \"$@\" failed $attempts times. Output: $output" false } # retry until the given container has state function wait_for_container() { local attempts=$1 local delay=$2 local cid=$3 local i for ((i=0; i < attempts; i++)); do runc state $cid if [[ "$status" -eq 0 ]] ; then return 0 fi sleep $delay done echo "runc state failed to return state $statecheck $attempts times. Output: $output" false } # retry until the given container has state function wait_for_container_inroot() { local attempts=$1 local delay=$2 local cid=$3 local i for ((i=0; i < attempts; i++)); do ROOT=$4 runc state $cid if [[ "$status" -eq 0 ]] ; then return 0 fi sleep $delay done echo "runc state failed to return state $statecheck $attempts times. Output: $output" false } function testcontainer() { # test state of container runc state $1 [ "$status" -eq 0 ] [[ "${output}" == *"$2"* ]] } function setup_busybox() { run mkdir "$BUSYBOX_BUNDLE" run mkdir "$BUSYBOX_BUNDLE"/rootfs if [ -e "/testdata/busybox.tar" ]; then BUSYBOX_IMAGE="/testdata/busybox.tar" fi if [ ! -e $BUSYBOX_IMAGE ]; then curl -o $BUSYBOX_IMAGE -sSL 'https://github.com/jpetazzo/docker-busybox/raw/buildroot-2014.11/rootfs.tar' fi tar -C "$BUSYBOX_BUNDLE"/rootfs -xf "$BUSYBOX_IMAGE" cd "$BUSYBOX_BUNDLE" runc spec } function setup_hello() { run mkdir "$HELLO_BUNDLE" run mkdir "$HELLO_BUNDLE"/rootfs tar -C "$HELLO_BUNDLE"/rootfs -xf "$HELLO_IMAGE" cd "$HELLO_BUNDLE" runc spec sed -i 's;"sh";"/hello";' config.json } function teardown_running_container() { runc list if [[ "${output}" == *"$1"* ]]; then runc kill $1 KILL retry 10 1 eval "__runc state '$1' | grep -q 'stopped'" runc delete $1 fi } function teardown_running_container_inroot() { ROOT=$2 runc list if [[ "${output}" == *"$1"* ]]; then ROOT=$2 runc kill $1 KILL retry 10 1 eval "ROOT='$2' __runc state '$1' | grep -q 'stopped'" ROOT=$2 runc delete $1 fi } function teardown_busybox() { cd "$INTEGRATION_ROOT" teardown_running_container test_busybox run rm -f -r "$BUSYBOX_BUNDLE" } function teardown_hello() { cd "$INTEGRATION_ROOT" teardown_running_container test_hello run rm -f -r "$HELLO_BUNDLE" } docker-runc-tags-docker-1.13.1/tests/integration/kill.bats000066400000000000000000000010261304443252500235060ustar00rootroot00000000000000#!/usr/bin/env bats load helpers function setup() { teardown_busybox setup_busybox } function teardown() { teardown_busybox } @test "kill detached busybox" { # run busybox detached runc run -d --console /dev/pts/ptmx test_busybox [ "$status" -eq 0 ] # check state wait_for_container 15 1 test_busybox testcontainer test_busybox running runc kill test_busybox KILL [ "$status" -eq 0 ] retry 10 1 eval "__runc state test_busybox | grep -q 'stopped'" runc delete test_busybox [ "$status" -eq 0 ] } docker-runc-tags-docker-1.13.1/tests/integration/list.bats000066400000000000000000000052411304443252500235310ustar00rootroot00000000000000#!/usr/bin/env bats load helpers function setup() { teardown_running_container_inroot test_box1 $HELLO_BUNDLE teardown_running_container_inroot test_box2 $HELLO_BUNDLE teardown_running_container_inroot test_box3 $HELLO_BUNDLE teardown_busybox setup_busybox } function teardown() { teardown_running_container_inroot test_box1 $HELLO_BUNDLE teardown_running_container_inroot test_box2 $HELLO_BUNDLE teardown_running_container_inroot test_box3 $HELLO_BUNDLE teardown_busybox } @test "list" { # run a few busyboxes detached ROOT=$HELLO_BUNDLE runc run -d --console /dev/pts/ptmx test_box1 [ "$status" -eq 0 ] wait_for_container_inroot 15 1 test_box1 $HELLO_BUNDLE ROOT=$HELLO_BUNDLE runc run -d --console /dev/pts/ptmx test_box2 [ "$status" -eq 0 ] wait_for_container_inroot 15 1 test_box2 $HELLO_BUNDLE ROOT=$HELLO_BUNDLE runc run -d --console /dev/pts/ptmx test_box3 [ "$status" -eq 0 ] wait_for_container_inroot 15 1 test_box3 $HELLO_BUNDLE ROOT=$HELLO_BUNDLE runc list [ "$status" -eq 0 ] [[ ${lines[0]} =~ ID\ +PID\ +STATUS\ +BUNDLE\ +CREATED+ ]] [[ "${lines[1]}" == *"test_box1"*[0-9]*"running"*$BUSYBOX_BUNDLE*[0-9]* ]] [[ "${lines[2]}" == *"test_box2"*[0-9]*"running"*$BUSYBOX_BUNDLE*[0-9]* ]] [[ "${lines[3]}" == *"test_box3"*[0-9]*"running"*$BUSYBOX_BUNDLE*[0-9]* ]] ROOT=$HELLO_BUNDLE runc list -q [ "$status" -eq 0 ] [[ "${lines[0]}" == "test_box1" ]] [[ "${lines[1]}" == "test_box2" ]] [[ "${lines[2]}" == "test_box3" ]] ROOT=$HELLO_BUNDLE runc list --format table [ "$status" -eq 0 ] [[ ${lines[0]} =~ ID\ +PID\ +STATUS\ +BUNDLE\ +CREATED+ ]] [[ "${lines[1]}" == *"test_box1"*[0-9]*"running"*$BUSYBOX_BUNDLE*[0-9]* ]] [[ "${lines[2]}" == *"test_box2"*[0-9]*"running"*$BUSYBOX_BUNDLE*[0-9]* ]] [[ "${lines[3]}" == *"test_box3"*[0-9]*"running"*$BUSYBOX_BUNDLE*[0-9]* ]] ROOT=$HELLO_BUNDLE runc list --format json [ "$status" -eq 0 ] [[ "${lines[0]}" == [\[][\{]"\"ociVersion\""[:]"\""*[0-9][\.]*[0-9][\.]*[0-9]*"\""[,]"\"id\""[:]"\"test_box1\""[,]"\"pid\""[:]*[0-9][,]"\"status\""[:]*"\"running\""[,]"\"bundle\""[:]*$BUSYBOX_BUNDLE*[,]"\"rootfs\""[:]"\""*"\""[,]"\"created\""[:]*[0-9]*[\}]* ]] [[ "${lines[0]}" == *[,][\{]"\"ociVersion\""[:]"\""*[0-9][\.]*[0-9][\.]*[0-9]*"\""[,]"\"id\""[:]"\"test_box2\""[,]"\"pid\""[:]*[0-9][,]"\"status\""[:]*"\"running\""[,]"\"bundle\""[:]*$BUSYBOX_BUNDLE*[,]"\"rootfs\""[:]"\""*"\""[,]"\"created\""[:]*[0-9]*[\}]* ]] [[ "${lines[0]}" == *[,][\{]"\"ociVersion\""[:]"\""*[0-9][\.]*[0-9][\.]*[0-9]*"\""[,]"\"id\""[:]"\"test_box3\""[,]"\"pid\""[:]*[0-9][,]"\"status\""[:]*"\"running\""[,]"\"bundle\""[:]*$BUSYBOX_BUNDLE*[,]"\"rootfs\""[:]"\""*"\""[,]"\"created\""[:]*[0-9]*[\}][\]] ]] } docker-runc-tags-docker-1.13.1/tests/integration/mask.bats000066400000000000000000000025701304443252500235130ustar00rootroot00000000000000#!/usr/bin/env bats load helpers function setup() { teardown_busybox setup_busybox # Create fake rootfs. mkdir rootfs/testdir echo "Forbidden information!" > rootfs/testfile # add extra masked paths sed -i 's;"maskedPaths": \[;"maskedPaths": \["/testdir","/testfile",;g' config.json } function teardown() { teardown_busybox } @test "mask paths [file]" { # run busybox detached runc run -d --console /dev/pts/ptmx test_busybox [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox runc exec test_busybox cat /testfile [ "$status" -eq 0 ] [[ "${output}" == "" ]] runc exec test_busybox rm -f /testfile [ "$status" -eq 1 ] [[ "${output}" == *"Read-only file system"* ]] runc exec test_busybox umount /testfile [ "$status" -eq 1 ] [[ "${output}" == *"Operation not permitted"* ]] } @test "mask paths [directory]" { # run busybox detached runc run -d --console /dev/pts/ptmx test_busybox [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox runc exec test_busybox ls /testdir [ "$status" -eq 0 ] [[ "${output}" == "" ]] runc exec test_busybox touch /testdir/foo [ "$status" -eq 1 ] [[ "${output}" == *"Read-only file system"* ]] runc exec test_busybox rm -rf /testdir [ "$status" -eq 1 ] [[ "${output}" == *"Read-only file system"* ]] runc exec test_busybox umount /testdir [ "$status" -eq 1 ] [[ "${output}" == *"Operation not permitted"* ]] } docker-runc-tags-docker-1.13.1/tests/integration/pause.bats000066400000000000000000000052331304443252500236740ustar00rootroot00000000000000#!/usr/bin/env bats load helpers function setup() { teardown_busybox setup_busybox } function teardown() { teardown_busybox } @test "runc pause and resume" { # run busybox detached runc run -d --console /dev/pts/ptmx test_busybox [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox # pause busybox runc pause test_busybox [ "$status" -eq 0 ] # test state of busybox is paused testcontainer test_busybox paused # resume busybox runc resume test_busybox [ "$status" -eq 0 ] # test state of busybox is back to running testcontainer test_busybox running } @test "runc pause and resume with multi-container" { # run test_busybox1 detached runc run -d --console /dev/pts/ptmx test_busybox1 [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox1 # run test_busybox2 detached runc run -d --console /dev/pts/ptmx test_busybox2 [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox2 # pause test_busybox1 and test_busybox2 runc pause test_busybox1 test_busybox2 [ "$status" -eq 0 ] # test state of test_busybox1 and test_busybox2 is paused testcontainer test_busybox1 paused testcontainer test_busybox2 paused # resume test_busybox1 and test_busybox2 runc resume test_busybox1 test_busybox2 [ "$status" -eq 0 ] # test state of two containers is back to running testcontainer test_busybox1 running testcontainer test_busybox2 running # delete test_busybox1 and test_busybox2 runc delete --force test_busybox1 test_busybox2 runc state test_busybox1 [ "$status" -ne 0 ] runc state test_busybox2 [ "$status" -ne 0 ] } @test "runc pause and resume with nonexist container" { # run test_busybox1 detached runc run -d --console /dev/pts/ptmx test_busybox1 [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox1 # run test_busybox2 detached runc run -d --console /dev/pts/ptmx test_busybox2 [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox2 # pause test_busybox1, test_busybox2 and nonexistant container runc pause test_busybox1 test_busybox2 nonexistant [ "$status" -ne 0 ] # test state of test_busybox1 and test_busybox2 is paused testcontainer test_busybox1 paused testcontainer test_busybox2 paused # resume test_busybox1, test_busybox2 and nonexistant container runc resume test_busybox1 test_busybox2 nonexistant [ "$status" -ne 0 ] # test state of two containers is back to running testcontainer test_busybox1 running testcontainer test_busybox2 running # delete test_busybox1 and test_busybox2 runc delete --force test_busybox1 test_busybox2 runc state test_busybox1 [ "$status" -ne 0 ] runc state test_busybox2 [ "$status" -ne 0 ] } docker-runc-tags-docker-1.13.1/tests/integration/ps.bats000066400000000000000000000022161304443252500231770ustar00rootroot00000000000000#!/usr/bin/env bats load helpers function setup() { teardown_busybox setup_busybox } function teardown() { teardown_busybox } @test "ps" { # start busybox detached runc run -d --console /dev/pts/ptmx test_busybox [ "$status" -eq 0 ] # check state wait_for_container 15 1 test_busybox testcontainer test_busybox running runc ps test_busybox [ "$status" -eq 0 ] [[ ${lines[0]} =~ UID\ +PID\ +PPID\ +C\ +STIME\ +TTY\ +TIME\ +CMD+ ]] [[ "${lines[1]}" == *"root"*[0-9]* ]] } @test "ps -f json" { # start busybox detached runc run -d --console /dev/pts/ptmx test_busybox [ "$status" -eq 0 ] # check state wait_for_container 15 1 test_busybox testcontainer test_busybox running runc ps -f json test_busybox [ "$status" -eq 0 ] [[ ${lines[0]} =~ [0-9]+ ]] } @test "ps -e -x" { # start busybox detached runc run -d --console /dev/pts/ptmx test_busybox [ "$status" -eq 0 ] # check state wait_for_container 15 1 test_busybox testcontainer test_busybox running runc ps test_busybox -e -x [ "$status" -eq 0 ] [[ ${lines[0]} =~ \ +PID\ +TTY\ +STAT\ +TIME\ +COMMAND+ ]] [[ "${lines[1]}" =~ [0-9]+ ]] } docker-runc-tags-docker-1.13.1/tests/integration/root.bats000066400000000000000000000026041304443252500235410ustar00rootroot00000000000000#!/usr/bin/env bats load helpers function setup() { teardown_running_container_inroot test_dotbox $HELLO_BUNDLE teardown_busybox setup_busybox } function teardown() { teardown_running_container_inroot test_dotbox $HELLO_BUNDLE teardown_busybox } @test "global --root" { # run busybox detached using $HELLO_BUNDLE for state ROOT=$HELLO_BUNDLE runc run -d --console /dev/pts/ptmx test_dotbox [ "$status" -eq 0 ] # run busybox detached in default root runc run -d --console /dev/pts/ptmx test_busybox [ "$status" -eq 0 ] # check state of the busyboxes are only in their respective root path wait_for_container 15 1 test_busybox wait_for_container_inroot 15 1 test_dotbox $HELLO_BUNDLE runc state test_busybox [ "$status" -eq 0 ] [[ "${output}" == *"running"* ]] ROOT=$HELLO_BUNDLE runc state test_dotbox [ "$status" -eq 0 ] [[ "${output}" == *"running"* ]] ROOT=$HELLO_BUNDLE runc state test_busybox [ "$status" -ne 0 ] runc state test_dotbox [ "$status" -ne 0 ] runc kill test_busybox KILL [ "$status" -eq 0 ] retry 10 1 eval "__runc state test_busybox | grep -q 'stopped'" runc delete test_busybox [ "$status" -eq 0 ] ROOT=$HELLO_BUNDLE runc kill test_dotbox KILL [ "$status" -eq 0 ] retry 10 1 eval "ROOT='$HELLO_BUNDLE' __runc state test_dotbox | grep -q 'stopped'" ROOT=$HELLO_BUNDLE runc delete test_dotbox [ "$status" -eq 0 ] } docker-runc-tags-docker-1.13.1/tests/integration/spec.bats000066400000000000000000000051221304443252500235060ustar00rootroot00000000000000#!/usr/bin/env bats load helpers function setup() { # initial cleanup in case a prior test exited and did not cleanup cd "$INTEGRATION_ROOT" run rm -f -r "$HELLO_BUNDLE" # setup hello-world for spec generation testing run mkdir "$HELLO_BUNDLE" run mkdir "$HELLO_BUNDLE"/rootfs run tar -C "$HELLO_BUNDLE"/rootfs -xf "$HELLO_IMAGE" } function teardown() { cd "$INTEGRATION_ROOT" run rm -f -r "$HELLO_BUNDLE" } @test "spec generation cwd" { cd "$HELLO_BUNDLE" # note this test runs from the bundle not the integration root # test that config.json does not exist after the above partial setup [ ! -e config.json ] # test generation of spec does not return an error runc spec [ "$status" -eq 0 ] # test generation of spec created our config.json (spec) [ -e config.json ] # test existence of required args parameter in the generated config.json run bash -c "grep -A2 'args' config.json | grep 'sh'" [[ "${output}" == *"sh"* ]] # change the default args parameter from sh to hello sed -i 's;"sh";"/hello";' config.json # ensure the generated spec works by running hello-world runc run test_hello [ "$status" -eq 0 ] } @test "spec generation --bundle" { # note this test runs from the integration root not the bundle # test that config.json does not exist after the above partial setup [ ! -e "$HELLO_BUNDLE"/config.json ] # test generation of spec does not return an error runc spec --bundle "$HELLO_BUNDLE" [ "$status" -eq 0 ] # test generation of spec created our config.json (spec) [ -e "$HELLO_BUNDLE"/config.json ] # change the default args parameter from sh to hello sed -i 's;"sh";"/hello";' "$HELLO_BUNDLE"/config.json # ensure the generated spec works by running hello-world runc run --bundle "$HELLO_BUNDLE" test_hello [ "$status" -eq 0 ] } @test "spec validator" { TESTDIR=$(pwd) cd "$HELLO_BUNDLE" run git clone https://github.com/opencontainers/runtime-spec.git src/runtime-spec [ "$status" -eq 0 ] SPEC_COMMIT=$(grep runtime-spec ${TESTDIR}/../../Godeps/Godeps.json -A 4 | grep Rev | cut -d":" -f 2 | tr -d ' "') ( cd src/runtime-spec && run git reset --hard "${SPEC_COMMIT}" ) [ "$status" -eq 0 ] [ -e src/runtime-spec/schema/config-schema.json ] run bash -c "GOPATH='$GOPATH' go get github.com/xeipuuv/gojsonschema" [ "$status" -eq 0 ] GOPATH="$GOPATH" go build src/runtime-spec/schema/validate.go [ -e ./validate ] runc spec [ -e config.json ] run ./validate src/runtime-spec/schema/config-schema.json config.json [ "$status" -eq 0 ] [[ "${lines[0]}" == *"The document is valid"* ]] } docker-runc-tags-docker-1.13.1/tests/integration/start.bats000066400000000000000000000014061304443252500237120ustar00rootroot00000000000000#!/usr/bin/env bats load helpers function setup() { teardown_busybox setup_busybox } function teardown() { teardown_busybox } @test "runc start" { runc create --console /dev/pts/ptmx test_busybox1 [ "$status" -eq 0 ] testcontainer test_busybox1 created runc create --console /dev/pts/ptmx test_busybox2 [ "$status" -eq 0 ] testcontainer test_busybox2 created # start container test_busybox1 and test_busybox2 runc start test_busybox1 test_busybox2 [ "$status" -eq 0 ] testcontainer test_busybox1 running testcontainer test_busybox2 running # delete test_busybox1 and test_busybox2 runc delete --force test_busybox1 test_busybox2 runc state test_busybox1 [ "$status" -ne 0 ] runc state test_busybox2 [ "$status" -ne 0 ] } docker-runc-tags-docker-1.13.1/tests/integration/start_detached.bats000066400000000000000000000022261304443252500255340ustar00rootroot00000000000000#!/usr/bin/env bats load helpers function setup() { teardown_busybox setup_busybox } function teardown() { teardown_busybox } @test "runc run detached" { # run busybox detached runc run -d --console /dev/pts/ptmx test_busybox [ "$status" -eq 0 ] # check state wait_for_container 15 1 test_busybox testcontainer test_busybox running } @test "runc run detached ({u,g}id != 0)" { # replace "uid": 0 with "uid": 1000 # and do a similar thing for gid. sed -i 's;"uid": 0;"uid": 1000;g' config.json sed -i 's;"gid": 0;"gid": 100;g' config.json # run busybox detached runc run -d --console /dev/pts/ptmx test_busybox [ "$status" -eq 0 ] # check state wait_for_container 15 1 test_busybox testcontainer test_busybox running } @test "runc run detached --pid-file" { # run busybox detached runc run --pid-file pid.txt -d --console /dev/pts/ptmx test_busybox [ "$status" -eq 0 ] # check state wait_for_container 15 1 test_busybox testcontainer test_busybox running # check pid.txt was generated [ -e pid.txt ] run cat pid.txt [ "$status" -eq 0 ] [[ ${lines[0]} == $(__runc state test_busybox | jq '.pid') ]] } docker-runc-tags-docker-1.13.1/tests/integration/start_hello.bats000066400000000000000000000021271304443252500250760ustar00rootroot00000000000000#!/usr/bin/env bats load helpers function setup() { teardown_hello setup_hello } function teardown() { teardown_hello } @test "runc run" { # run hello-world runc run test_hello [ "$status" -eq 0 ] # check expected output [[ "${output}" == *"Hello"* ]] } @test "runc run ({u,g}id != 0)" { # replace "uid": 0 with "uid": 1000 # and do a similar thing for gid. sed -i 's;"uid": 0;"uid": 1000;g' config.json sed -i 's;"gid": 0;"gid": 100;g' config.json # run hello-world runc run test_hello [ "$status" -eq 0 ] # check expected output [[ "${output}" == *"Hello"* ]] } @test "runc run with rootfs set to ." { cp config.json rootfs/. rm config.json cd rootfs sed -i 's;"rootfs";".";' config.json # run hello-world runc run test_hello [ "$status" -eq 0 ] [[ "${output}" == *"Hello"* ]] } @test "runc run --pid-file" { # run hello-world runc run --pid-file pid.txt test_hello [ "$status" -eq 0 ] [[ "${output}" == *"Hello"* ]] # check pid.txt was generated [ -e pid.txt ] run cat pid.txt [ "$status" -eq 0 ] [[ ${lines[0]} =~ [0-9]+ ]] } docker-runc-tags-docker-1.13.1/tests/integration/state.bats000066400000000000000000000016471304443252500237040ustar00rootroot00000000000000#!/usr/bin/env bats load helpers function setup() { teardown_busybox setup_busybox } function teardown() { teardown_busybox } @test "state" { runc state test_busybox [ "$status" -ne 0 ] # run busybox detached runc run -d --console /dev/pts/ptmx test_busybox [ "$status" -eq 0 ] # check state wait_for_container 15 1 test_busybox testcontainer test_busybox running # pause busybox runc pause test_busybox [ "$status" -eq 0 ] # test state of busybox is paused testcontainer test_busybox paused # resume busybox runc resume test_busybox [ "$status" -eq 0 ] # test state of busybox is back to running testcontainer test_busybox running runc kill test_busybox KILL # wait for busybox to be in the destroyed state retry 10 1 eval "__runc state test_busybox | grep -q 'stopped'" # delete test_busybox runc delete test_busybox runc state test_busybox [ "$status" -ne 0 ] } docker-runc-tags-docker-1.13.1/tests/integration/testdata/000077500000000000000000000000001304443252500235125ustar00rootroot00000000000000docker-runc-tags-docker-1.13.1/tests/integration/testdata/hello-world.tar000066400000000000000000000220001304443252500264440ustar00rootroot00000000000000.dockerenv0100755000000000000000000000000012672361047011227 0ustar0000000000000000.dockerinit0100755000000000000000000000000012672361047011402 0ustar0000000000000000dev/0040755000000000000000000000000012672361047010036 5ustar0000000000000000dev/console0100755000000000000000000000000012672361047011411 0ustar0000000000000000dev/pts/0040755000000000000000000000000012672361047010644 5ustar0000000000000000dev/shm/0040755000000000000000000000000012672361047010625 5ustar0000000000000000etc/0040755000000000000000000000000012672361047010033 5ustar0000000000000000etc/hostname0100755000000000000000000000000012672361047011562 0ustar0000000000000000etc/hosts0100755000000000000000000000000012672361047011104 0ustar0000000000000000etc/mtab0120777000000000000000000000000012672361047013167 2/proc/mountsustar0000000000000000etc/resolv.conf0100755000000000000000000000000012672361047012202 0ustar0000000000000000hello0100755000000000000000000000170012536046265010305 0ustar0000000000000000ELF>x@@@8@@ÀÀ ¸¿H¾Ÿ@º!¸<¿ Hello from Docker. This message shows that your installation appears to be working correctly. To generate this message, Docker took the following steps: 1. The Docker client contacted the Docker daemon. 2. The Docker daemon pulled the "hello-world" image from the Docker Hub. 3. The Docker daemon created a new container from that image which runs the executable that produces the output you are currently reading. 4. The Docker daemon streamed that output to the Docker client, which sent it to your terminal. To try something more ambitious, you can run an Ubuntu container with: $ docker run -it ubuntu bash Share images, automate workflows, and more with a free Docker Hub account: https://hub.docker.com For more examples and ideas, visit: https://docs.docker.com/userguide/ proc/0040755000000000000000000000000012672361047010223 5ustar0000000000000000sys/0040755000000000000000000000000012672361047010076 5ustar0000000000000000docker-runc-tags-docker-1.13.1/tests/integration/update.bats000066400000000000000000000157341304443252500240500ustar00rootroot00000000000000#!/usr/bin/env bats load helpers function teardown() { rm -f $BATS_TMPDIR/runc-update-integration-test.json teardown_running_container test_update teardown_busybox } function setup() { teardown setup_busybox # Add cgroup path sed -i 's/\("linux": {\)/\1\n "cgroupsPath": "\/runc-update-integration-test",/' ${BUSYBOX_BUNDLE}/config.json # Set some initial known values DATA=$(cat </ { print $5; exit }') eval CGROUP_${g}="${base_path}/runc-update-integration-test" done # check that initial values were properly set check_cgroup_value $CGROUP_BLKIO "blkio.weight" 1000 check_cgroup_value $CGROUP_CPU "cpu.cfs_period_us" 1000000 check_cgroup_value $CGROUP_CPU "cpu.cfs_quota_us" 500000 check_cgroup_value $CGROUP_CPU "cpu.shares" 100 check_cgroup_value $CGROUP_CPUSET "cpuset.cpus" 0 check_cgroup_value $CGROUP_MEMORY "memory.kmem.limit_in_bytes" 16777216 check_cgroup_value $CGROUP_MEMORY "memory.kmem.tcp.limit_in_bytes" 11534336 check_cgroup_value $CGROUP_MEMORY "memory.limit_in_bytes" 33554432 check_cgroup_value $CGROUP_MEMORY "memory.soft_limit_in_bytes" 25165824 # update blkio-weight runc update test_update --blkio-weight 500 [ "$status" -eq 0 ] check_cgroup_value $CGROUP_BLKIO "blkio.weight" 500 # update cpu-period runc update test_update --cpu-period 900000 [ "$status" -eq 0 ] check_cgroup_value $CGROUP_CPU "cpu.cfs_period_us" 900000 # update cpu-quota runc update test_update --cpu-quota 600000 [ "$status" -eq 0 ] check_cgroup_value $CGROUP_CPU "cpu.cfs_quota_us" 600000 # update cpu-shares runc update test_update --cpu-share 200 [ "$status" -eq 0 ] check_cgroup_value $CGROUP_CPU "cpu.shares" 200 # update cpuset if supported (i.e. we're running on a multicore cpu) cpu_count=$(grep '^processor' /proc/cpuinfo | wc -l) if [ $cpu_count -gt 1 ]; then runc update test_update --cpuset-cpus "1" [ "$status" -eq 0 ] check_cgroup_value $CGROUP_CPUSET "cpuset.cpus" 1 fi # update memory limit runc update test_update --memory 67108864 [ "$status" -eq 0 ] check_cgroup_value $CGROUP_MEMORY "memory.limit_in_bytes" 67108864 runc update test_update --memory 50M [ "$status" -eq 0 ] check_cgroup_value $CGROUP_MEMORY "memory.limit_in_bytes" 52428800 # update memory soft limit runc update test_update --memory-reservation 33554432 [ "$status" -eq 0 ] check_cgroup_value $CGROUP_MEMORY "memory.soft_limit_in_bytes" 33554432 # update memory swap (if available) if [ -f "$CGROUP_MEMORY/memory.memsw.limit_in_bytes" ]; then runc update test_update --memory-swap 96468992 [ "$status" -eq 0 ] check_cgroup_value $CGROUP_MEMORY "memory.memsw.limit_in_bytes" 96468992 fi # update kernel memory limit runc update test_update --kernel-memory 50331648 [ "$status" -eq 0 ] check_cgroup_value $CGROUP_MEMORY "memory.kmem.limit_in_bytes" 50331648 # update kernel memory tcp limit runc update test_update --kernel-memory-tcp 41943040 [ "$status" -eq 0 ] check_cgroup_value $CGROUP_MEMORY "memory.kmem.tcp.limit_in_bytes" 41943040 # Revert to the test initial value via json on stding runc update -r - test_update < $BATS_TMPDIR/runc-update-integration-test.json runc update -r $BATS_TMPDIR/runc-update-integration-test.json test_update [ "$status" -eq 0 ] check_cgroup_value $CGROUP_BLKIO "blkio.weight" 1000 check_cgroup_value $CGROUP_CPU "cpu.cfs_period_us" 1000000 check_cgroup_value $CGROUP_CPU "cpu.cfs_quota_us" 500000 check_cgroup_value $CGROUP_CPU "cpu.shares" 100 check_cgroup_value $CGROUP_CPUSET "cpuset.cpus" 0 check_cgroup_value $CGROUP_MEMORY "memory.kmem.limit_in_bytes" 16777216 check_cgroup_value $CGROUP_MEMORY "memory.kmem.tcp.limit_in_bytes" 11534336 check_cgroup_value $CGROUP_MEMORY "memory.limit_in_bytes" 33554432 check_cgroup_value $CGROUP_MEMORY "memory.soft_limit_in_bytes" 25165824 } docker-runc-tags-docker-1.13.1/tests/integration/version.bats000066400000000000000000000003561304443252500242450ustar00rootroot00000000000000#!/usr/bin/env bats load helpers @test "runc version" { runc -v [ "$status" -eq 0 ] [[ ${lines[0]} =~ runc\ version\ [0-9]+\.[0-9]+\.[0-9]+ ]] [[ ${lines[1]} =~ commit:+ ]] [[ ${lines[2]} =~ spec:\ [0-9]+\.[0-9]+\.[0-9]+ ]] } docker-runc-tags-docker-1.13.1/tty.go000066400000000000000000000051661304443252500173730ustar00rootroot00000000000000// +build linux package main import ( "fmt" "io" "os" "sync" "github.com/docker/docker/pkg/term" "github.com/opencontainers/runc/libcontainer" ) // setup standard pipes so that the TTY of the calling runc process // is not inherited by the container. func createStdioPipes(p *libcontainer.Process, rootuid, rootgid int) (*tty, error) { i, err := p.InitializeIO(rootuid, rootgid) if err != nil { return nil, err } t := &tty{ closers: []io.Closer{ i.Stdin, i.Stdout, i.Stderr, }, } // add the process's io to the post start closers if they support close for _, cc := range []interface{}{ p.Stdin, p.Stdout, p.Stderr, } { if c, ok := cc.(io.Closer); ok { t.postStart = append(t.postStart, c) } } go func() { io.Copy(i.Stdin, os.Stdin) i.Stdin.Close() }() t.wg.Add(2) go t.copyIO(os.Stdout, i.Stdout) go t.copyIO(os.Stderr, i.Stderr) return t, nil } func (t *tty) copyIO(w io.Writer, r io.ReadCloser) { defer t.wg.Done() io.Copy(w, r) r.Close() } func createTty(p *libcontainer.Process, rootuid, rootgid int, consolePath string) (*tty, error) { if consolePath != "" { if err := p.ConsoleFromPath(consolePath); err != nil { return nil, err } return &tty{}, nil } console, err := p.NewConsole(rootuid, rootgid) if err != nil { return nil, err } go io.Copy(console, os.Stdin) go io.Copy(os.Stdout, console) state, err := term.SetRawTerminal(os.Stdin.Fd()) if err != nil { return nil, fmt.Errorf("failed to set the terminal from the stdin: %v", err) } return &tty{ console: console, state: state, closers: []io.Closer{ console, }, }, nil } type tty struct { console libcontainer.Console state *term.State closers []io.Closer postStart []io.Closer wg sync.WaitGroup } // ClosePostStart closes any fds that are provided to the container and dup2'd // so that we no longer have copy in our process. func (t *tty) ClosePostStart() error { for _, c := range t.postStart { c.Close() } return nil } // Close closes all open fds for the tty and/or restores the orignal // stdin state to what it was prior to the container execution func (t *tty) Close() error { // ensure that our side of the fds are always closed for _, c := range t.postStart { c.Close() } // wait for the copy routines to finish before closing the fds t.wg.Wait() for _, c := range t.closers { c.Close() } if t.state != nil { term.RestoreTerminal(os.Stdin.Fd(), t.state) } return nil } func (t *tty) resize() error { if t.console == nil { return nil } ws, err := term.GetWinsize(os.Stdin.Fd()) if err != nil { return err } return term.SetWinsize(t.console.Fd(), ws) } docker-runc-tags-docker-1.13.1/update.go000066400000000000000000000131331304443252500200260ustar00rootroot00000000000000// +build linux package main import ( "encoding/json" "fmt" "os" "strconv" "github.com/docker/go-units" "github.com/opencontainers/runtime-spec/specs-go" "github.com/urfave/cli" ) func u64Ptr(i uint64) *uint64 { return &i } func u16Ptr(i uint16) *uint16 { return &i } var updateCommand = cli.Command{ Name: "update", Usage: "update container resource constraints", ArgsUsage: ``, Flags: []cli.Flag{ cli.StringFlag{ Name: "resources, r", Value: "", Usage: `path to the file containing the resources to update or '-' to read from the standard input The accepted format is as follow (unchanged values can be omitted): { "memory": { "limit": 0, "reservation": 0, "swap": 0, "kernel": 0, "kernelTCP": 0 }, "cpu": { "shares": 0, "quota": 0, "period": 0, "realtimeRuntime": 0, "realtimePeriod": 0, "cpus": "", "mems": "" }, "blockIO": { "blkioWeight": 0 } } Note: if data is to be read from a file or the standard input, all other options are ignored. `, }, cli.IntFlag{ Name: "blkio-weight", Usage: "Specifies per cgroup weight, range is from 10 to 1000", }, cli.StringFlag{ Name: "cpu-period", Usage: "CPU CFS period to be used for hardcapping (in usecs). 0 to use system default", }, cli.StringFlag{ Name: "cpu-quota", Usage: "CPU CFS hardcap limit (in usecs). Allowed cpu time in a given period", }, cli.StringFlag{ Name: "cpu-share", Usage: "CPU shares (relative weight vs. other containers)", }, cli.StringFlag{ Name: "cpu-rt-period", Usage: "CPU realtime period to be used for hardcapping (in usecs). 0 to use system default", }, cli.StringFlag{ Name: "cpu-rt-runtime", Usage: "CPU realtime hardcap limit (in usecs). Allowed cpu time in a given period", }, cli.StringFlag{ Name: "cpuset-cpus", Usage: "CPU(s) to use", }, cli.StringFlag{ Name: "cpuset-mems", Usage: "Memory node(s) to use", }, cli.StringFlag{ Name: "kernel-memory", Usage: "Kernel memory limit (in bytes)", }, cli.StringFlag{ Name: "kernel-memory-tcp", Usage: "Kernel memory limit (in bytes) for tcp buffer", }, cli.StringFlag{ Name: "memory", Usage: "Memory limit (in bytes)", }, cli.StringFlag{ Name: "memory-reservation", Usage: "Memory reservation or soft_limit (in bytes)", }, cli.StringFlag{ Name: "memory-swap", Usage: "Total memory usage (memory + swap); set '-1' to enable unlimited swap", }, }, Action: func(context *cli.Context) error { container, err := getContainer(context) if err != nil { return err } r := specs.Resources{ Memory: &specs.Memory{ Limit: u64Ptr(0), Reservation: u64Ptr(0), Swap: u64Ptr(0), Kernel: u64Ptr(0), KernelTCP: u64Ptr(0), }, CPU: &specs.CPU{ Shares: u64Ptr(0), Quota: u64Ptr(0), Period: u64Ptr(0), RealtimeRuntime: u64Ptr(0), RealtimePeriod: u64Ptr(0), Cpus: sPtr(""), Mems: sPtr(""), }, BlockIO: &specs.BlockIO{ Weight: u16Ptr(0), }, } config := container.Config() if in := context.String("resources"); in != "" { var ( f *os.File err error ) switch in { case "-": f = os.Stdin default: f, err = os.Open(in) if err != nil { return err } } err = json.NewDecoder(f).Decode(&r) if err != nil { return err } } else { if val := context.Int("blkio-weight"); val != 0 { r.BlockIO.Weight = u16Ptr(uint16(val)) } if val := context.String("cpuset-cpus"); val != "" { r.CPU.Cpus = &val } if val := context.String("cpuset-mems"); val != "" { r.CPU.Mems = &val } for _, pair := range []struct { opt string dest *uint64 }{ {"cpu-period", r.CPU.Period}, {"cpu-quota", r.CPU.Quota}, {"cpu-rt-period", r.CPU.RealtimePeriod}, {"cpu-rt-runtime", r.CPU.RealtimeRuntime}, {"cpu-share", r.CPU.Shares}, } { if val := context.String(pair.opt); val != "" { var err error *pair.dest, err = strconv.ParseUint(val, 10, 64) if err != nil { return fmt.Errorf("invalid value for %s: %s", pair.opt, err) } } } for _, pair := range []struct { opt string dest *uint64 }{ {"kernel-memory", r.Memory.Kernel}, {"kernel-memory-tcp", r.Memory.KernelTCP}, {"memory", r.Memory.Limit}, {"memory-reservation", r.Memory.Reservation}, {"memory-swap", r.Memory.Swap}, } { if val := context.String(pair.opt); val != "" { v, err := units.RAMInBytes(val) if err != nil { return fmt.Errorf("invalid value for %s: %s", pair.opt, err) } *pair.dest = uint64(v) } } } // Update the value config.Cgroups.Resources.BlkioWeight = *r.BlockIO.Weight config.Cgroups.Resources.CpuPeriod = int64(*r.CPU.Period) config.Cgroups.Resources.CpuQuota = int64(*r.CPU.Quota) config.Cgroups.Resources.CpuShares = int64(*r.CPU.Shares) config.Cgroups.Resources.CpuRtPeriod = int64(*r.CPU.RealtimePeriod) config.Cgroups.Resources.CpuRtRuntime = int64(*r.CPU.RealtimeRuntime) config.Cgroups.Resources.CpusetCpus = *r.CPU.Cpus config.Cgroups.Resources.CpusetMems = *r.CPU.Mems config.Cgroups.Resources.KernelMemory = int64(*r.Memory.Kernel) config.Cgroups.Resources.KernelMemoryTCP = int64(*r.Memory.KernelTCP) config.Cgroups.Resources.Memory = int64(*r.Memory.Limit) config.Cgroups.Resources.MemoryReservation = int64(*r.Memory.Reservation) config.Cgroups.Resources.MemorySwap = int64(*r.Memory.Swap) if err := container.Set(config); err != nil { return err } return nil }, } docker-runc-tags-docker-1.13.1/utils.go000066400000000000000000000017011304443252500177020ustar00rootroot00000000000000package main import ( "fmt" "os" "github.com/Sirupsen/logrus" "github.com/opencontainers/runtime-spec/specs-go" "github.com/urfave/cli" ) // fatal prints the error's details if it is a libcontainer specific error type // then exits the program with an exit status of 1. func fatal(err error) { // make sure the error is written to the logger logrus.Error(err) fmt.Fprintln(os.Stderr, err) os.Exit(1) } // setupSpec performs initial setup based on the cli.Context for the container func setupSpec(context *cli.Context) (*specs.Spec, error) { bundle := context.String("bundle") if bundle != "" { if err := os.Chdir(bundle); err != nil { return nil, err } } spec, err := loadSpec(specConfig) if err != nil { return nil, err } notifySocket := os.Getenv("NOTIFY_SOCKET") if notifySocket != "" { setupSdNotify(spec, notifySocket) } if os.Geteuid() != 0 { return nil, fmt.Errorf("runc should be run as root") } return spec, nil } docker-runc-tags-docker-1.13.1/utils_linux.go000066400000000000000000000202251304443252500211230ustar00rootroot00000000000000// +build linux package main import ( "errors" "fmt" "os" "path/filepath" "strconv" "syscall" "github.com/Sirupsen/logrus" "github.com/coreos/go-systemd/activation" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/cgroups/systemd" "github.com/opencontainers/runc/libcontainer/specconv" "github.com/opencontainers/runtime-spec/specs-go" "github.com/urfave/cli" ) var errEmptyID = errors.New("container id cannot be empty") var container libcontainer.Container // loadFactory returns the configured factory instance for execing containers. func loadFactory(context *cli.Context) (libcontainer.Factory, error) { root := context.GlobalString("root") abs, err := filepath.Abs(root) if err != nil { return nil, err } cgroupManager := libcontainer.Cgroupfs if context.GlobalBool("systemd-cgroup") { if systemd.UseSystemd() { cgroupManager = libcontainer.SystemdCgroups } else { return nil, fmt.Errorf("systemd cgroup flag passed, but systemd support for managing cgroups is not available") } } return libcontainer.New(abs, cgroupManager, libcontainer.CriuPath(context.GlobalString("criu"))) } // getContainer returns the specified container instance by loading it from state // with the default factory. func getContainer(context *cli.Context) (libcontainer.Container, error) { id := context.Args().First() if id == "" { return nil, errEmptyID } factory, err := loadFactory(context) if err != nil { return nil, err } return factory.Load(id) } func fatalf(t string, v ...interface{}) { fatal(fmt.Errorf(t, v...)) } func getDefaultImagePath(context *cli.Context) string { cwd, err := os.Getwd() if err != nil { panic(err) } return filepath.Join(cwd, "checkpoint") } // newProcess returns a new libcontainer Process with the arguments from the // spec and stdio from the current process. func newProcess(p specs.Process) (*libcontainer.Process, error) { lp := &libcontainer.Process{ Args: p.Args, Env: p.Env, // TODO: fix libcontainer's API to better support uid/gid in a typesafe way. User: fmt.Sprintf("%d:%d", p.User.UID, p.User.GID), Cwd: p.Cwd, Capabilities: p.Capabilities, Label: p.SelinuxLabel, NoNewPrivileges: &p.NoNewPrivileges, AppArmorProfile: p.ApparmorProfile, } for _, gid := range p.User.AdditionalGids { lp.AdditionalGroups = append(lp.AdditionalGroups, strconv.FormatUint(uint64(gid), 10)) } for _, rlimit := range p.Rlimits { rl, err := createLibContainerRlimit(rlimit) if err != nil { return nil, err } lp.Rlimits = append(lp.Rlimits, rl) } return lp, nil } func dupStdio(process *libcontainer.Process, rootuid, rootgid int) error { process.Stdin = os.Stdin process.Stdout = os.Stdout process.Stderr = os.Stderr for _, fd := range []uintptr{ os.Stdin.Fd(), os.Stdout.Fd(), os.Stderr.Fd(), } { if err := syscall.Fchown(int(fd), rootuid, rootgid); err != nil { return err } } return nil } // If systemd is supporting sd_notify protocol, this function will add support // for sd_notify protocol from within the container. func setupSdNotify(spec *specs.Spec, notifySocket string) { spec.Mounts = append(spec.Mounts, specs.Mount{Destination: notifySocket, Type: "bind", Source: notifySocket, Options: []string{"bind"}}) spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", notifySocket)) } func destroy(container libcontainer.Container) { if err := container.Destroy(); err != nil { logrus.Error(err) } } // setupIO sets the proper IO on the process depending on the configuration // If there is a nil error then there must be a non nil tty returned func setupIO(process *libcontainer.Process, rootuid, rootgid int, console string, createTTY, detach bool) (*tty, error) { // detach and createTty will not work unless a console path is passed // so error out here before changing any terminal settings if createTTY && detach && console == "" { return nil, fmt.Errorf("cannot allocate tty if runc will detach") } if createTTY { return createTty(process, rootuid, rootgid, console) } if detach { if err := dupStdio(process, rootuid, rootgid); err != nil { return nil, err } return &tty{}, nil } return createStdioPipes(process, rootuid, rootgid) } // createPidFile creates a file with the processes pid inside it atomically // it creates a temp file with the paths filename + '.' infront of it // then renames the file func createPidFile(path string, process *libcontainer.Process) error { pid, err := process.Pid() if err != nil { return err } var ( tmpDir = filepath.Dir(path) tmpName = filepath.Join(tmpDir, fmt.Sprintf(".%s", filepath.Base(path))) ) f, err := os.OpenFile(tmpName, os.O_RDWR|os.O_CREATE|os.O_EXCL|os.O_SYNC, 0666) if err != nil { return err } _, err = fmt.Fprintf(f, "%d", pid) f.Close() if err != nil { return err } return os.Rename(tmpName, path) } func createContainer(context *cli.Context, id string, spec *specs.Spec) (libcontainer.Container, error) { config, err := specconv.CreateLibcontainerConfig(&specconv.CreateOpts{ CgroupName: id, UseSystemdCgroup: context.GlobalBool("systemd-cgroup"), NoPivotRoot: context.Bool("no-pivot"), NoNewKeyring: context.Bool("no-new-keyring"), Spec: spec, }) if err != nil { return nil, err } factory, err := loadFactory(context) if err != nil { return nil, err } return factory.Create(id, config) } type runner struct { enableSubreaper bool shouldDestroy bool detach bool listenFDs []*os.File pidFile string console string container libcontainer.Container create bool } func (r *runner) run(config *specs.Process) (int, error) { process, err := newProcess(*config) if err != nil { r.destroy() return -1, err } if len(r.listenFDs) > 0 { process.Env = append(process.Env, fmt.Sprintf("LISTEN_FDS=%d", len(r.listenFDs)), "LISTEN_PID=1") process.ExtraFiles = append(process.ExtraFiles, r.listenFDs...) } rootuid, err := r.container.Config().HostUID() if err != nil { r.destroy() return -1, err } rootgid, err := r.container.Config().HostGID() if err != nil { r.destroy() return -1, err } tty, err := setupIO(process, rootuid, rootgid, r.console, config.Terminal, r.detach || r.create) if err != nil { r.destroy() return -1, err } handler := newSignalHandler(tty, r.enableSubreaper) startFn := r.container.Start if !r.create { startFn = r.container.Run } defer tty.Close() if err := startFn(process); err != nil { r.destroy() return -1, err } if err := tty.ClosePostStart(); err != nil { r.terminate(process) r.destroy() return -1, err } if r.pidFile != "" { if err := createPidFile(r.pidFile, process); err != nil { r.terminate(process) r.destroy() return -1, err } } if r.detach || r.create { return 0, nil } status, err := handler.forward(process) if err != nil { r.terminate(process) } r.destroy() return status, err } func (r *runner) destroy() { if r.shouldDestroy { destroy(r.container) } } func (r *runner) terminate(p *libcontainer.Process) { p.Signal(syscall.SIGKILL) p.Wait() } func validateProcessSpec(spec *specs.Process) error { if spec.Cwd == "" { return fmt.Errorf("Cwd property must not be empty") } if !filepath.IsAbs(spec.Cwd) { return fmt.Errorf("Cwd must be an absolute path") } if len(spec.Args) == 0 { return fmt.Errorf("args must not be empty") } return nil } func startContainer(context *cli.Context, spec *specs.Spec, create bool) (int, error) { id := context.Args().First() if id == "" { return -1, errEmptyID } container, err := createContainer(context, id, spec) if err != nil { return -1, err } // Support on-demand socket activation by passing file descriptors into the container init process. listenFDs := []*os.File{} if os.Getenv("LISTEN_FDS") != "" { listenFDs = activation.Files(false) } r := &runner{ enableSubreaper: !context.Bool("no-subreaper"), shouldDestroy: true, container: container, listenFDs: listenFDs, console: context.String("console"), detach: context.Bool("detach"), pidFile: context.String("pid-file"), create: create, } return r.run(&spec.Process) }