libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/0000755000175000017500000000000012527300122020152 5ustar tianontianonlibcontainer-227771c8f611f03639f0eeb169428761d9504ab5/integration/0000755000175000017500000000000012524212370022501 5ustar tianontianonlibcontainer-227771c8f611f03639f0eeb169428761d9504ab5/integration/template_test.go0000644000175000017500000000517112524212370025706 0ustar tianontianonpackage integration import ( "syscall" "github.com/docker/libcontainer/configs" ) var standardEnvironment = []string{ "HOME=/root", "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "HOSTNAME=integration", "TERM=xterm", } const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV // newTemplateConfig returns a base template for running a container // // it uses a network strategy of just setting a loopback interface // and the default setup for devices func newTemplateConfig(rootfs string) *configs.Config { return &configs.Config{ Rootfs: rootfs, Capabilities: []string{ "CHOWN", "DAC_OVERRIDE", "FSETID", "FOWNER", "MKNOD", "NET_RAW", "SETGID", "SETUID", "SETFCAP", "SETPCAP", "NET_BIND_SERVICE", "SYS_CHROOT", "KILL", "AUDIT_WRITE", }, Namespaces: configs.Namespaces([]configs.Namespace{ {Type: configs.NEWNS}, {Type: configs.NEWUTS}, {Type: configs.NEWIPC}, {Type: configs.NEWPID}, {Type: configs.NEWNET}, }), Cgroups: &configs.Cgroup{ Name: "test", Parent: "integration", AllowAllDevices: false, AllowedDevices: configs.DefaultAllowedDevices, }, MaskPaths: []string{ "/proc/kcore", }, ReadonlyPaths: []string{ "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus", }, Devices: configs.DefaultAutoCreatedDevices, Hostname: "integration", Mounts: []*configs.Mount{ { Source: "proc", Destination: "/proc", Device: "proc", Flags: defaultMountFlags, }, { Source: "tmpfs", Destination: "/dev", Device: "tmpfs", Flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, Data: "mode=755", }, { Source: "devpts", Destination: "/dev/pts", Device: "devpts", Flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, Data: "newinstance,ptmxmode=0666,mode=0620,gid=5", }, { Device: "tmpfs", Source: "shm", Destination: "/dev/shm", Data: "mode=1777,size=65536k", Flags: defaultMountFlags, }, { Source: "mqueue", Destination: "/dev/mqueue", Device: "mqueue", Flags: defaultMountFlags, }, { Source: "sysfs", Destination: "/sys", Device: "sysfs", Flags: defaultMountFlags | syscall.MS_RDONLY, }, }, Networks: []*configs.Network{ { Type: "loopback", Address: "127.0.0.1/0", Gateway: "localhost", }, }, Rlimits: []configs.Rlimit{ { Type: syscall.RLIMIT_NOFILE, Hard: uint64(1025), Soft: uint64(1025), }, }, } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/integration/doc.go0000644000175000017500000000012312524212370023571 0ustar tianontianon// integration is used for integration testing of libcontainer package integration libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/integration/init_test.go0000644000175000017500000000112612524212370025032 0ustar tianontianonpackage integration import ( "log" "os" "runtime" "github.com/docker/libcontainer" _ "github.com/docker/libcontainer/nsenter" ) // init runs the libcontainer initialization code because of the busybox style needs // to work around the go runtime and the issues with forking func init() { if len(os.Args) < 2 || os.Args[1] != "init" { return } runtime.GOMAXPROCS(1) runtime.LockOSThread() factory, err := libcontainer.New("") if err != nil { log.Fatalf("unable to initialize for container: %s", err) } if err := factory.StartInitialization(3); err != nil { log.Fatal(err) } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/integration/exec_test.go0000644000175000017500000003063612524212370025023 0ustar tianontianonpackage integration import ( "bytes" "io/ioutil" "os" "strconv" "strings" "testing" "github.com/docker/libcontainer" "github.com/docker/libcontainer/cgroups/systemd" "github.com/docker/libcontainer/configs" ) func TestExecPS(t *testing.T) { testExecPS(t, false) } func TestUsernsExecPS(t *testing.T) { if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { t.Skip("userns is unsupported") } testExecPS(t, true) } func testExecPS(t *testing.T, userns bool) { if testing.Short() { return } rootfs, err := newRootfs() if err != nil { t.Fatal(err) } defer remove(rootfs) config := newTemplateConfig(rootfs) if userns { config.UidMappings = []configs.IDMap{{0, 0, 1000}} config.GidMappings = []configs.IDMap{{0, 0, 1000}} config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWUSER}) } buffers, exitCode, err := runContainer(config, "", "ps") if err != nil { t.Fatalf("%s: %s", buffers, err) } if exitCode != 0 { t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) } lines := strings.Split(buffers.Stdout.String(), "\n") if len(lines) < 2 { t.Fatalf("more than one process running for output %q", buffers.Stdout.String()) } expected := `1 root ps` actual := strings.Trim(lines[1], "\n ") if actual != expected { t.Fatalf("expected output %q but received %q", expected, actual) } } func TestIPCPrivate(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() if err != nil { t.Fatal(err) } defer remove(rootfs) l, err := os.Readlink("/proc/1/ns/ipc") if err != nil { t.Fatal(err) } config := newTemplateConfig(rootfs) buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc") if err != nil { t.Fatal(err) } if exitCode != 0 { t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) } if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual == l { t.Fatalf("ipc link should be private to the container but equals host %q %q", actual, l) } } func TestIPCHost(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() if err != nil { t.Fatal(err) } defer remove(rootfs) l, err := os.Readlink("/proc/1/ns/ipc") if err != nil { t.Fatal(err) } config := newTemplateConfig(rootfs) config.Namespaces.Remove(configs.NEWIPC) buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc") if err != nil { t.Fatal(err) } if exitCode != 0 { t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) } if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual != l { t.Fatalf("ipc link not equal to host link %q %q", actual, l) } } func TestIPCJoinPath(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() if err != nil { t.Fatal(err) } defer remove(rootfs) l, err := os.Readlink("/proc/1/ns/ipc") if err != nil { t.Fatal(err) } config := newTemplateConfig(rootfs) config.Namespaces.Add(configs.NEWIPC, "/proc/1/ns/ipc") buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc") if err != nil { t.Fatal(err) } if exitCode != 0 { t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) } if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual != l { t.Fatalf("ipc link not equal to host link %q %q", actual, l) } } func TestIPCBadPath(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() if err != nil { t.Fatal(err) } defer remove(rootfs) config := newTemplateConfig(rootfs) config.Namespaces.Add(configs.NEWIPC, "/proc/1/ns/ipcc") _, _, err = runContainer(config, "", "true") if err == nil { t.Fatal("container succeeded with bad ipc path") } } func TestRlimit(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() if err != nil { t.Fatal(err) } defer remove(rootfs) config := newTemplateConfig(rootfs) out, _, err := runContainer(config, "", "/bin/sh", "-c", "ulimit -n") if err != nil { t.Fatal(err) } if limit := strings.TrimSpace(out.Stdout.String()); limit != "1025" { t.Fatalf("expected rlimit to be 1025, got %s", limit) } } func newTestRoot() (string, error) { dir, err := ioutil.TempDir("", "libcontainer") if err != nil { return "", err } if err := os.MkdirAll(dir, 0700); err != nil { return "", err } return dir, nil } func waitProcess(p *libcontainer.Process, t *testing.T) { status, err := p.Wait() if err != nil { t.Fatal(err) } if !status.Success() { t.Fatal(status) } } func TestEnter(t *testing.T) { if testing.Short() { return } root, err := newTestRoot() if err != nil { t.Fatal(err) } defer os.RemoveAll(root) rootfs, err := newRootfs() if err != nil { t.Fatal(err) } defer remove(rootfs) config := newTemplateConfig(rootfs) factory, err := libcontainer.New(root, libcontainer.Cgroupfs) if err != nil { t.Fatal(err) } container, err := factory.Create("test", config) if err != nil { t.Fatal(err) } defer container.Destroy() // Execute a first process in the container stdinR, stdinW, err := os.Pipe() if err != nil { t.Fatal(err) } var stdout, stdout2 bytes.Buffer pconfig := libcontainer.Process{ Args: []string{"sh", "-c", "cat && readlink /proc/self/ns/pid"}, Env: standardEnvironment, Stdin: stdinR, Stdout: &stdout, } err = container.Start(&pconfig) stdinR.Close() defer stdinW.Close() if err != nil { t.Fatal(err) } pid, err := pconfig.Pid() if err != nil { t.Fatal(err) } // Execute another process in the container stdinR2, stdinW2, err := os.Pipe() if err != nil { t.Fatal(err) } pconfig2 := libcontainer.Process{ Env: standardEnvironment, } pconfig2.Args = []string{"sh", "-c", "cat && readlink /proc/self/ns/pid"} pconfig2.Stdin = stdinR2 pconfig2.Stdout = &stdout2 err = container.Start(&pconfig2) stdinR2.Close() defer stdinW2.Close() if err != nil { t.Fatal(err) } pid2, err := pconfig2.Pid() if err != nil { t.Fatal(err) } processes, err := container.Processes() if err != nil { t.Fatal(err) } n := 0 for i := range processes { if processes[i] == pid || processes[i] == pid2 { n++ } } if n != 2 { t.Fatal("unexpected number of processes", processes, pid, pid2) } // Wait processes stdinW2.Close() waitProcess(&pconfig2, t) stdinW.Close() waitProcess(&pconfig, t) // Check that both processes live in the same pidns pidns := string(stdout.Bytes()) if err != nil { t.Fatal(err) } pidns2 := string(stdout2.Bytes()) if err != nil { t.Fatal(err) } if pidns != pidns2 { t.Fatal("The second process isn't in the required pid namespace", pidns, pidns2) } } func TestProcessEnv(t *testing.T) { if testing.Short() { return } root, err := newTestRoot() if err != nil { t.Fatal(err) } defer os.RemoveAll(root) rootfs, err := newRootfs() if err != nil { t.Fatal(err) } defer remove(rootfs) config := newTemplateConfig(rootfs) factory, err := libcontainer.New(root, libcontainer.Cgroupfs) if err != nil { t.Fatal(err) } container, err := factory.Create("test", config) if err != nil { t.Fatal(err) } defer container.Destroy() var stdout bytes.Buffer pconfig := libcontainer.Process{ Args: []string{"sh", "-c", "env"}, Env: []string{ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "HOSTNAME=integration", "TERM=xterm", "FOO=BAR", }, Stdin: nil, Stdout: &stdout, } err = container.Start(&pconfig) if err != nil { t.Fatal(err) } // Wait for process waitProcess(&pconfig, t) outputEnv := string(stdout.Bytes()) if err != nil { t.Fatal(err) } // Check that the environment has the key/value pair we added if !strings.Contains(outputEnv, "FOO=BAR") { t.Fatal("Environment doesn't have the expected FOO=BAR key/value pair: ", outputEnv) } // Make sure that HOME is set if !strings.Contains(outputEnv, "HOME=/root") { t.Fatal("Environment doesn't have HOME set: ", outputEnv) } } func TestProcessCaps(t *testing.T) { if testing.Short() { return } root, err := newTestRoot() if err != nil { t.Fatal(err) } defer os.RemoveAll(root) rootfs, err := newRootfs() if err != nil { t.Fatal(err) } defer remove(rootfs) config := newTemplateConfig(rootfs) factory, err := libcontainer.New(root, libcontainer.Cgroupfs) if err != nil { t.Fatal(err) } container, err := factory.Create("test", config) if err != nil { t.Fatal(err) } defer container.Destroy() processCaps := append(config.Capabilities, "NET_ADMIN") var stdout bytes.Buffer pconfig := libcontainer.Process{ Args: []string{"sh", "-c", "cat /proc/self/status"}, Env: standardEnvironment, Capabilities: processCaps, Stdin: nil, Stdout: &stdout, } err = container.Start(&pconfig) if err != nil { t.Fatal(err) } // Wait for process waitProcess(&pconfig, t) outputStatus := string(stdout.Bytes()) if err != nil { t.Fatal(err) } lines := strings.Split(outputStatus, "\n") effectiveCapsLine := "" for _, l := range lines { line := strings.TrimSpace(l) if strings.Contains(line, "CapEff:") { effectiveCapsLine = line break } } if effectiveCapsLine == "" { t.Fatal("Couldn't find effective caps: ", outputStatus) } parts := strings.Split(effectiveCapsLine, ":") effectiveCapsStr := strings.TrimSpace(parts[1]) effectiveCaps, err := strconv.ParseUint(effectiveCapsStr, 16, 64) if err != nil { t.Fatal("Could not parse effective caps", err) } var netAdminMask uint64 var netAdminBit uint netAdminBit = 12 // from capability.h netAdminMask = 1 << netAdminBit if effectiveCaps&netAdminMask != netAdminMask { t.Fatal("CAP_NET_ADMIN is not set as expected") } } func TestFreeze(t *testing.T) { testFreeze(t, false) } func TestSystemdFreeze(t *testing.T) { if !systemd.UseSystemd() { t.Skip("Systemd is unsupported") } testFreeze(t, true) } func testFreeze(t *testing.T, systemd bool) { if testing.Short() { return } root, err := newTestRoot() if err != nil { t.Fatal(err) } defer os.RemoveAll(root) rootfs, err := newRootfs() if err != nil { t.Fatal(err) } defer remove(rootfs) config := newTemplateConfig(rootfs) if systemd { config.Cgroups.Slice = "system.slice" } factory, err := libcontainer.New(root, libcontainer.Cgroupfs) if err != nil { t.Fatal(err) } container, err := factory.Create("test", config) if err != nil { t.Fatal(err) } defer container.Destroy() stdinR, stdinW, err := os.Pipe() if err != nil { t.Fatal(err) } pconfig := libcontainer.Process{ Args: []string{"cat"}, Env: standardEnvironment, Stdin: stdinR, } err = container.Start(&pconfig) stdinR.Close() defer stdinW.Close() if err != nil { t.Fatal(err) } pid, err := pconfig.Pid() if err != nil { t.Fatal(err) } process, err := os.FindProcess(pid) if err != nil { t.Fatal(err) } if err := container.Pause(); err != nil { t.Fatal(err) } state, err := container.Status() if err != nil { t.Fatal(err) } if err := container.Resume(); err != nil { t.Fatal(err) } if state != libcontainer.Paused { t.Fatal("Unexpected state: ", state) } stdinW.Close() s, err := process.Wait() if err != nil { t.Fatal(err) } if !s.Success() { t.Fatal(s.String()) } } func TestContainerState(t *testing.T) { if testing.Short() { return } root, err := newTestRoot() if err != nil { t.Fatal(err) } defer os.RemoveAll(root) rootfs, err := newRootfs() if err != nil { t.Fatal(err) } defer remove(rootfs) l, err := os.Readlink("/proc/1/ns/ipc") if err != nil { t.Fatal(err) } config := newTemplateConfig(rootfs) config.Namespaces = configs.Namespaces([]configs.Namespace{ {Type: configs.NEWNS}, {Type: configs.NEWUTS}, // host for IPC //{Type: configs.NEWIPC}, {Type: configs.NEWPID}, {Type: configs.NEWNET}, }) factory, err := libcontainer.New(root, libcontainer.Cgroupfs) if err != nil { t.Fatal(err) } container, err := factory.Create("test", config) if err != nil { t.Fatal(err) } defer container.Destroy() stdinR, stdinW, err := os.Pipe() if err != nil { t.Fatal(err) } p := &libcontainer.Process{ Args: []string{"cat"}, Env: standardEnvironment, Stdin: stdinR, } err = container.Start(p) if err != nil { t.Fatal(err) } stdinR.Close() defer p.Signal(os.Kill) st, err := container.State() if err != nil { t.Fatal(err) } l1, err := os.Readlink(st.NamespacePaths[configs.NEWIPC]) if err != nil { t.Fatal(err) } if l1 != l { t.Fatal("Container using non-host ipc namespace") } stdinW.Close() p.Wait() } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/integration/utils_test.go0000644000175000017500000000527212524212370025235 0ustar tianontianonpackage integration import ( "bytes" "fmt" "io/ioutil" "os" "os/exec" "strings" "syscall" "github.com/docker/libcontainer" "github.com/docker/libcontainer/configs" ) func newStdBuffers() *stdBuffers { return &stdBuffers{ Stdin: bytes.NewBuffer(nil), Stdout: bytes.NewBuffer(nil), Stderr: bytes.NewBuffer(nil), } } type stdBuffers struct { Stdin *bytes.Buffer Stdout *bytes.Buffer Stderr *bytes.Buffer } func (b *stdBuffers) String() string { s := []string{} if b.Stderr != nil { s = append(s, b.Stderr.String()) } if b.Stdout != nil { s = append(s, b.Stdout.String()) } return strings.Join(s, "|") } // newRootfs creates a new tmp directory and copies the busybox root filesystem func newRootfs() (string, error) { dir, err := ioutil.TempDir("", "") if err != nil { return "", err } if err := os.MkdirAll(dir, 0700); err != nil { return "", err } if err := copyBusybox(dir); err != nil { return "", nil } return dir, nil } func remove(dir string) { os.RemoveAll(dir) } // copyBusybox copies the rootfs for a busybox container created for the test image // into the new directory for the specific test func copyBusybox(dest string) error { out, err := exec.Command("sh", "-c", fmt.Sprintf("cp -R /busybox/* %s/", dest)).CombinedOutput() if err != nil { return fmt.Errorf("copy error %q: %q", err, out) } return nil } func newContainer(config *configs.Config) (libcontainer.Container, error) { cgm := libcontainer.Cgroupfs if config.Cgroups != nil && config.Cgroups.Slice == "system.slice" { cgm = libcontainer.SystemdCgroups } factory, err := libcontainer.New(".", libcontainer.InitArgs(os.Args[0], "init", "--"), cgm, ) if err != nil { return nil, err } return factory.Create("testCT", config) } // runContainer runs the container with the specific config and arguments // // buffers are returned containing the STDOUT and STDERR output for the run // along with the exit code and any go error func runContainer(config *configs.Config, console string, args ...string) (buffers *stdBuffers, exitCode int, err error) { container, err := newContainer(config) if err != nil { return nil, -1, err } defer container.Destroy() buffers = newStdBuffers() process := &libcontainer.Process{ Args: args, Env: standardEnvironment, Stdin: buffers.Stdin, Stdout: buffers.Stdout, Stderr: buffers.Stderr, } err = container.Start(process) if err != nil { return nil, -1, err } ps, err := process.Wait() if err != nil { return nil, -1, err } status := ps.Sys().(syscall.WaitStatus) if status.Exited() { exitCode = status.ExitStatus() } else if status.Signaled() { exitCode = -int(status.Signal()) } else { return nil, -1, err } return } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/integration/execin_test.go0000644000175000017500000001441412524212370025346 0ustar tianontianonpackage integration import ( "bytes" "io" "os" "strings" "testing" "time" "github.com/docker/libcontainer" ) func TestExecIn(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() if err != nil { t.Fatal(err) } defer remove(rootfs) config := newTemplateConfig(rootfs) container, err := newContainer(config) if err != nil { t.Fatal(err) } defer container.Destroy() // Execute a first process in the container stdinR, stdinW, err := os.Pipe() if err != nil { t.Fatal(err) } process := &libcontainer.Process{ Args: []string{"cat"}, Env: standardEnvironment, Stdin: stdinR, } err = container.Start(process) stdinR.Close() defer stdinW.Close() if err != nil { t.Fatal(err) } buffers := newStdBuffers() ps := &libcontainer.Process{ Args: []string{"ps"}, Env: standardEnvironment, Stdin: buffers.Stdin, Stdout: buffers.Stdout, Stderr: buffers.Stderr, } err = container.Start(ps) if err != nil { t.Fatal(err) } if _, err := ps.Wait(); err != nil { t.Fatal(err) } stdinW.Close() if _, err := process.Wait(); err != nil { t.Log(err) } out := buffers.Stdout.String() if !strings.Contains(out, "cat") || !strings.Contains(out, "ps") { t.Fatalf("unexpected running process, output %q", out) } } func TestExecInRlimit(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() if err != nil { t.Fatal(err) } defer remove(rootfs) config := newTemplateConfig(rootfs) container, err := newContainer(config) if err != nil { t.Fatal(err) } defer container.Destroy() stdinR, stdinW, err := os.Pipe() if err != nil { t.Fatal(err) } process := &libcontainer.Process{ Args: []string{"cat"}, Env: standardEnvironment, Stdin: stdinR, } err = container.Start(process) stdinR.Close() defer stdinW.Close() if err != nil { t.Fatal(err) } buffers := newStdBuffers() ps := &libcontainer.Process{ Args: []string{"/bin/sh", "-c", "ulimit -n"}, Env: standardEnvironment, Stdin: buffers.Stdin, Stdout: buffers.Stdout, Stderr: buffers.Stderr, } err = container.Start(ps) if err != nil { t.Fatal(err) } if _, err := ps.Wait(); err != nil { t.Fatal(err) } stdinW.Close() if _, err := process.Wait(); err != nil { t.Log(err) } out := buffers.Stdout.String() if limit := strings.TrimSpace(out); limit != "1025" { t.Fatalf("expected rlimit to be 1025, got %s", limit) } } func TestExecInError(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() if err != nil { t.Fatal(err) } defer remove(rootfs) config := newTemplateConfig(rootfs) container, err := newContainer(config) if err != nil { t.Fatal(err) } defer container.Destroy() // Execute a first process in the container stdinR, stdinW, err := os.Pipe() if err != nil { t.Fatal(err) } process := &libcontainer.Process{ Args: []string{"cat"}, Env: standardEnvironment, Stdin: stdinR, } err = container.Start(process) stdinR.Close() defer func() { stdinW.Close() if _, err := process.Wait(); err != nil { t.Log(err) } }() if err != nil { t.Fatal(err) } unexistent := &libcontainer.Process{ Args: []string{"unexistent"}, Env: standardEnvironment, } err = container.Start(unexistent) if err == nil { t.Fatal("Should be an error") } if !strings.Contains(err.Error(), "executable file not found") { t.Fatalf("Should be error about not found executable, got %s", err) } } func TestExecInTTY(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() if err != nil { t.Fatal(err) } defer remove(rootfs) config := newTemplateConfig(rootfs) container, err := newContainer(config) if err != nil { t.Fatal(err) } defer container.Destroy() // Execute a first process in the container stdinR, stdinW, err := os.Pipe() if err != nil { t.Fatal(err) } process := &libcontainer.Process{ Args: []string{"cat"}, Env: standardEnvironment, Stdin: stdinR, } err = container.Start(process) stdinR.Close() defer stdinW.Close() if err != nil { t.Fatal(err) } var stdout bytes.Buffer ps := &libcontainer.Process{ Args: []string{"ps"}, Env: standardEnvironment, } console, err := ps.NewConsole(0) copy := make(chan struct{}) go func() { io.Copy(&stdout, console) close(copy) }() if err != nil { t.Fatal(err) } err = container.Start(ps) if err != nil { t.Fatal(err) } select { case <-time.After(5 * time.Second): t.Fatal("Waiting for copy timed out") case <-copy: } if _, err := ps.Wait(); err != nil { t.Fatal(err) } stdinW.Close() if _, err := process.Wait(); err != nil { t.Log(err) } out := stdout.String() if !strings.Contains(out, "cat") || !strings.Contains(string(out), "ps") { t.Fatalf("unexpected running process, output %q", out) } } func TestExecInEnvironment(t *testing.T) { if testing.Short() { return } rootfs, err := newRootfs() if err != nil { t.Fatal(err) } defer remove(rootfs) config := newTemplateConfig(rootfs) container, err := newContainer(config) if err != nil { t.Fatal(err) } defer container.Destroy() // Execute a first process in the container stdinR, stdinW, err := os.Pipe() if err != nil { t.Fatal(err) } process := &libcontainer.Process{ Args: []string{"cat"}, Env: standardEnvironment, Stdin: stdinR, } err = container.Start(process) stdinR.Close() defer stdinW.Close() if err != nil { t.Fatal(err) } buffers := newStdBuffers() process2 := &libcontainer.Process{ Args: []string{"env"}, Env: []string{ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "DEBUG=true", "DEBUG=false", "ENV=test", }, Stdin: buffers.Stdin, Stdout: buffers.Stdout, Stderr: buffers.Stderr, } err = container.Start(process2) if err != nil { t.Fatal(err) } if _, err := process2.Wait(); err != nil { out := buffers.Stdout.String() t.Fatal(err, out) } stdinW.Close() if _, err := process.Wait(); err != nil { t.Log(err) } out := buffers.Stdout.String() // check execin's process environment if !strings.Contains(out, "DEBUG=false") || !strings.Contains(out, "ENV=test") || !strings.Contains(out, "HOME=/root") || !strings.Contains(out, "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin") || strings.Contains(out, "DEBUG=true") { t.Fatalf("unexpected running process, output %q", out) } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/process_linux.go0000644000175000017500000001351112524212370023403 0ustar tianontianon// +build linux package libcontainer import ( "encoding/json" "errors" "io" "os" "os/exec" "syscall" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/system" ) type parentProcess interface { // pid returns the pid for the running process. pid() int // start starts the process execution. start() error // send a SIGKILL to the process and wait for the exit. terminate() error // wait waits on the process returning the process state. wait() (*os.ProcessState, error) // startTime return's the process start time. startTime() (string, error) signal(os.Signal) error } type setnsProcess struct { cmd *exec.Cmd parentPipe *os.File childPipe *os.File cgroupPaths map[string]string config *initConfig } func (p *setnsProcess) startTime() (string, error) { return system.GetProcessStartTime(p.pid()) } func (p *setnsProcess) signal(sig os.Signal) error { s, ok := sig.(syscall.Signal) if !ok { return errors.New("os: unsupported signal type") } return syscall.Kill(p.cmd.Process.Pid, s) } func (p *setnsProcess) start() (err error) { defer p.parentPipe.Close() if err = p.execSetns(); err != nil { return newSystemError(err) } if len(p.cgroupPaths) > 0 { if err := cgroups.EnterPid(p.cgroupPaths, p.cmd.Process.Pid); err != nil { return newSystemError(err) } } if err := json.NewEncoder(p.parentPipe).Encode(p.config); err != nil { return newSystemError(err) } if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil { return newSystemError(err) } // wait for the child process to fully complete and receive an error message // if one was encoutered var ierr *genericError if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF { return newSystemError(err) } if ierr != nil { return newSystemError(ierr) } return nil } // execSetns runs the process that executes C code to perform the setns calls // because setns support requires the C process to fork off a child and perform the setns // before the go runtime boots, we wait on the process to die and receive the child's pid // over the provided pipe. func (p *setnsProcess) execSetns() error { err := p.cmd.Start() p.childPipe.Close() if err != nil { return newSystemError(err) } status, err := p.cmd.Process.Wait() if err != nil { p.cmd.Wait() return newSystemError(err) } if !status.Success() { p.cmd.Wait() return newSystemError(&exec.ExitError{ProcessState: status}) } var pid *pid if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil { p.cmd.Wait() return newSystemError(err) } process, err := os.FindProcess(pid.Pid) if err != nil { return err } p.cmd.Process = process return nil } // terminate sends a SIGKILL to the forked process for the setns routine then waits to // avoid the process becomming a zombie. func (p *setnsProcess) terminate() error { if p.cmd.Process == nil { return nil } err := p.cmd.Process.Kill() if _, werr := p.wait(); err == nil { err = werr } return err } func (p *setnsProcess) wait() (*os.ProcessState, error) { err := p.cmd.Wait() if err != nil { return p.cmd.ProcessState, err } return p.cmd.ProcessState, nil } func (p *setnsProcess) pid() int { return p.cmd.Process.Pid } type initProcess struct { cmd *exec.Cmd parentPipe *os.File childPipe *os.File config *initConfig manager cgroups.Manager } func (p *initProcess) pid() int { return p.cmd.Process.Pid } func (p *initProcess) start() error { defer p.parentPipe.Close() err := p.cmd.Start() p.childPipe.Close() if err != nil { return newSystemError(err) } // Do this before syncing with child so that no children // can escape the cgroup if err := p.manager.Apply(p.pid()); err != nil { return newSystemError(err) } defer func() { if err != nil { // TODO: should not be the responsibility to call here p.manager.Destroy() } }() if err := p.createNetworkInterfaces(); err != nil { return newSystemError(err) } if err := p.sendConfig(); err != nil { return newSystemError(err) } // wait for the child process to fully complete and receive an error message // if one was encoutered var ierr *genericError if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF { return newSystemError(err) } if ierr != nil { return newSystemError(ierr) } return nil } func (p *initProcess) wait() (*os.ProcessState, error) { err := p.cmd.Wait() if err != nil { return p.cmd.ProcessState, err } // we should kill all processes in cgroup when init is died if we use host PID namespace if p.cmd.SysProcAttr.Cloneflags&syscall.CLONE_NEWPID == 0 { killCgroupProcesses(p.manager) } return p.cmd.ProcessState, nil } func (p *initProcess) terminate() error { if p.cmd.Process == nil { return nil } err := p.cmd.Process.Kill() if _, werr := p.wait(); err == nil { err = werr } return err } func (p *initProcess) startTime() (string, error) { return system.GetProcessStartTime(p.pid()) } func (p *initProcess) sendConfig() error { // send the state to the container's init process then shutdown writes for the parent if err := json.NewEncoder(p.parentPipe).Encode(p.config); err != nil { return err } // shutdown writes for the parent side of the pipe return syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR) } func (p *initProcess) createNetworkInterfaces() error { for _, config := range p.config.Config.Networks { strategy, err := getStrategy(config.Type) if err != nil { return err } n := &network{ Network: *config, } if err := strategy.create(n, p.pid()); err != nil { return err } p.config.Networks = append(p.config.Networks, n) } return nil } func (p *initProcess) signal(sig os.Signal) error { s, ok := sig.(syscall.Signal) if !ok { return errors.New("os: unsupported signal type") } return syscall.Kill(p.cmd.Process.Pid, s) } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/factory_linux.go0000644000175000017500000001752512524212370023405 0ustar tianontianon// +build linux package libcontainer import ( "encoding/json" "fmt" "io/ioutil" "os" "os/exec" "path/filepath" "regexp" "syscall" "github.com/docker/docker/pkg/mount" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups/fs" "github.com/docker/libcontainer/cgroups/systemd" "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/configs/validate" ) const ( stateFilename = "state.json" ) var ( idRegex = regexp.MustCompile(`^[\w_]+$`) maxIdLen = 1024 ) // InitArgs returns an options func to configure a LinuxFactory with the // provided init arguments. func InitArgs(args ...string) func(*LinuxFactory) error { return func(l *LinuxFactory) error { name := args[0] if filepath.Base(name) == name { if lp, err := exec.LookPath(name); err == nil { name = lp } } l.InitPath = name l.InitArgs = append([]string{name}, args[1:]...) return nil } } // InitPath returns an options func to configure a LinuxFactory with the // provided absolute path to the init binary and arguements. func InitPath(path string, args ...string) func(*LinuxFactory) error { return func(l *LinuxFactory) error { l.InitPath = path l.InitArgs = args return nil } } // SystemdCgroups is an options func to configure a LinuxFactory to return // containers that use systemd to create and manage cgroups. func SystemdCgroups(l *LinuxFactory) error { l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { return &systemd.Manager{ Cgroups: config, Paths: paths, } } return nil } // Cgroupfs is an options func to configure a LinuxFactory to return // containers that use the native cgroups filesystem implementation to // create and manage cgroups. func Cgroupfs(l *LinuxFactory) error { l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { return &fs.Manager{ Cgroups: config, Paths: paths, } } return nil } // TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs. func TmpfsRoot(l *LinuxFactory) error { mounted, err := mount.Mounted(l.Root) if err != nil { return err } if !mounted { if err := syscall.Mount("tmpfs", l.Root, "tmpfs", 0, ""); err != nil { return err } } return nil } // New returns a linux based container factory based in the root directory and // configures the factory with the provided option funcs. func New(root string, options ...func(*LinuxFactory) error) (Factory, error) { if root != "" { if err := os.MkdirAll(root, 0700); err != nil { return nil, newGenericError(err, SystemError) } } l := &LinuxFactory{ Root: root, Validator: validate.New(), } InitArgs(os.Args[0], "init")(l) Cgroupfs(l) for _, opt := range options { if err := opt(l); err != nil { return nil, err } } return l, nil } // LinuxFactory implements the default factory interface for linux based systems. type LinuxFactory struct { // Root directory for the factory to store state. Root string // InitPath is the absolute path to the init binary. InitPath string // InitArgs are arguments for calling the init responsibilities for spawning // a container. InitArgs []string // Validator provides validation to container configurations. Validator validate.Validator // NewCgroupsManager returns an initialized cgroups manager for a single container. NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager } func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) { if l.Root == "" { return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid) } if err := l.validateID(id); err != nil { return nil, err } if err := l.Validator.Validate(config); err != nil { return nil, newGenericError(err, ConfigInvalid) } containerRoot := filepath.Join(l.Root, id) if _, err := os.Stat(containerRoot); err == nil { return nil, newGenericError(fmt.Errorf("Container with id exists: %v", id), IdInUse) } else if !os.IsNotExist(err) { return nil, newGenericError(err, SystemError) } if err := os.MkdirAll(containerRoot, 0700); err != nil { return nil, newGenericError(err, SystemError) } return &linuxContainer{ id: id, root: containerRoot, config: config, initPath: l.InitPath, initArgs: l.InitArgs, cgroupManager: l.NewCgroupsManager(config.Cgroups, nil), }, nil } func (l *LinuxFactory) Load(id string) (Container, error) { if l.Root == "" { return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid) } containerRoot := filepath.Join(l.Root, id) state, err := l.loadState(containerRoot) if err != nil { return nil, err } r := &restoredProcess{ processPid: state.InitProcessPid, processStartTime: state.InitProcessStartTime, } return &linuxContainer{ initProcess: r, id: id, config: &state.Config, initPath: l.InitPath, initArgs: l.InitArgs, cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths), root: containerRoot, }, nil } func (l *LinuxFactory) Type() string { return "libcontainer" } // StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state // This is a low level implementation detail of the reexec and should not be consumed externally func (l *LinuxFactory) StartInitialization(pipefd uintptr) (err error) { var ( pipe = os.NewFile(uintptr(pipefd), "pipe") it = initType(os.Getenv("_LIBCONTAINER_INITTYPE")) ) // clear the current process's environment to clean any libcontainer // specific env vars. os.Clearenv() defer func() { // if we have an error during the initialization of the container's init then send it back to the // parent process in the form of an initError. if err != nil { // ensure that any data sent from the parent is consumed so it doesn't // receive ECONNRESET when the child writes to the pipe. ioutil.ReadAll(pipe) if err := json.NewEncoder(pipe).Encode(newSystemError(err)); err != nil { panic(err) } } // ensure that this pipe is always closed pipe.Close() }() i, err := newContainerInit(it, pipe) if err != nil { return err } return i.Init() } func (l *LinuxFactory) loadState(root string) (*State, error) { f, err := os.Open(filepath.Join(root, stateFilename)) if err != nil { if os.IsNotExist(err) { return nil, newGenericError(err, ContainerNotExists) } return nil, newGenericError(err, SystemError) } defer f.Close() var state *State if err := json.NewDecoder(f).Decode(&state); err != nil { return nil, newGenericError(err, SystemError) } return state, nil } func (l *LinuxFactory) validateID(id string) error { if !idRegex.MatchString(id) { return newGenericError(fmt.Errorf("Invalid id format: %v", id), InvalidIdFormat) } if len(id) > maxIdLen { return newGenericError(fmt.Errorf("Invalid id format: %v", id), InvalidIdFormat) } return nil } // restoredProcess represents a process where the calling process may or may not be // the parent process. This process is created when a factory loads a container from // a persisted state. type restoredProcess struct { processPid int processStartTime string } func (p *restoredProcess) start() error { return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError) } func (p *restoredProcess) pid() int { return p.processPid } func (p *restoredProcess) terminate() error { return newGenericError(fmt.Errorf("restored process cannot be terminated"), SystemError) } func (p *restoredProcess) wait() (*os.ProcessState, error) { return nil, newGenericError(fmt.Errorf("restored process cannot be waited on"), SystemError) } func (p *restoredProcess) startTime() (string, error) { return p.processStartTime, nil } func (p *restoredProcess) signal(s os.Signal) error { return newGenericError(fmt.Errorf("restored process cannot be signaled"), SystemError) } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/PRINCIPLES.md0000644000175000017500000000216212524212370022151 0ustar tianontianon# libcontainer Principles In the design and development of libcontainer we try to follow these principles: (Work in progress) * Don't try to replace every tool. Instead, be an ingredient to improve them. * Less code is better. * Fewer components are better. Do you really need to add one more class? * 50 lines of straightforward, readable code is better than 10 lines of magic that nobody can understand. * Don't do later what you can do now. "//TODO: refactor" is not acceptable in new code. * When hesitating between two options, choose the one that is easier to reverse. * "No" is temporary; "Yes" is forever. If you're not sure about a new feature, say no. You can change your mind later. * Containers must be portable to the greatest possible number of machines. Be suspicious of any change which makes machines less interchangeable. * The fewer moving parts in a container, the better. * Don't merge it unless you document it. * Don't document it unless you can keep it up-to-date. * Don't merge it unless you test it! * Everyone's problem is slightly different. Focus on the part that is the same for everyone, and solve that. libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/container_nouserns_linux.go0000644000175000017500000000037212524212370025644 0ustar tianontianon// +build !go1.4 package libcontainer import ( "fmt" "syscall" ) // not available before go 1.4 func (c *linuxContainer) addUidGidMappings(sys *syscall.SysProcAttr) error { return fmt.Errorf("User namespace is not supported in golang < 1.4") } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/configs/0000755000175000017500000000000012524212370021606 5ustar tianontianonlibcontainer-227771c8f611f03639f0eeb169428761d9504ab5/configs/device.go0000644000175000017500000000205512524212370023376 0ustar tianontianonpackage configs import ( "fmt" "os" ) const ( Wildcard = -1 ) type Device struct { // Device type, block, char, etc. Type rune `json:"type"` // Path to the device. Path string `json:"path"` // Major is the device's major number. Major int64 `json:"major"` // Minor is the device's minor number. Minor int64 `json:"minor"` // Cgroup permissions format, rwm. Permissions string `json:"permissions"` // FileMode permission bits for the device. FileMode os.FileMode `json:"file_mode"` // Uid of the device. Uid uint32 `json:"uid"` // Gid of the device. Gid uint32 `json:"gid"` } func (d *Device) CgroupString() string { return fmt.Sprintf("%c %s:%s %s", d.Type, deviceNumberString(d.Major), deviceNumberString(d.Minor), d.Permissions) } func (d *Device) Mkdev() int { return int((d.Major << 8) | (d.Minor & 0xff) | ((d.Minor & 0xfff00) << 12)) } // deviceNumberString converts the device number to a string return result. func deviceNumberString(number int64) string { if number == Wildcard { return "*" } return fmt.Sprint(number) } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/configs/mount.go0000644000175000017500000000073512524212370023304 0ustar tianontianonpackage configs type Mount struct { // Source path for the mount. Source string `json:"source"` // Destination path for the mount inside the container. Destination string `json:"destination"` // Device the mount is for. Device string `json:"device"` // Mount flags. Flags int `json:"flags"` // Mount data applied to the mount. Data string `json:"data"` // Relabel source if set, "z" indicates shared, "Z" indicates unshared. Relabel string `json:"relabel"` } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/configs/validate/0000755000175000017500000000000012524212370023377 5ustar tianontianonlibcontainer-227771c8f611f03639f0eeb169428761d9504ab5/configs/validate/config.go0000644000175000017500000000455712524212370025206 0ustar tianontianonpackage validate import ( "fmt" "os" "path/filepath" "github.com/docker/libcontainer/configs" ) type Validator interface { Validate(*configs.Config) error } func New() Validator { return &ConfigValidator{} } type ConfigValidator struct { } func (v *ConfigValidator) Validate(config *configs.Config) error { if err := v.rootfs(config); err != nil { return err } if err := v.network(config); err != nil { return err } if err := v.hostname(config); err != nil { return err } if err := v.security(config); err != nil { return err } if err := v.usernamespace(config); err != nil { return err } return nil } // rootfs validates the the rootfs is an absolute path and is not a symlink // to the container's root filesystem. func (v *ConfigValidator) rootfs(config *configs.Config) error { cleaned, err := filepath.Abs(config.Rootfs) if err != nil { return err } if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil { return err } if config.Rootfs != cleaned { return fmt.Errorf("%s is not an absolute path or is a symlink", config.Rootfs) } return nil } func (v *ConfigValidator) network(config *configs.Config) error { if !config.Namespaces.Contains(configs.NEWNET) { if len(config.Networks) > 0 || len(config.Routes) > 0 { return fmt.Errorf("unable to apply network settings without a private NET namespace") } } return nil } func (v *ConfigValidator) hostname(config *configs.Config) error { if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) { return fmt.Errorf("unable to set hostname without a private UTS namespace") } return nil } func (v *ConfigValidator) security(config *configs.Config) error { // restrict sys without mount namespace if (len(config.MaskPaths) > 0 || len(config.ReadonlyPaths) > 0) && !config.Namespaces.Contains(configs.NEWNS) { return fmt.Errorf("unable to restrict sys entries without a private MNT namespace") } return nil } func (v *ConfigValidator) usernamespace(config *configs.Config) error { if config.Namespaces.Contains(configs.NEWUSER) { if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { return fmt.Errorf("USER namespaces aren't enabled in the kernel") } } else { if config.UidMappings != nil || config.GidMappings != nil { return fmt.Errorf("User namespace mappings specified, but USER namespace isn't enabled in the config") } } return nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/configs/namespaces.go0000644000175000017500000000431212524212370024254 0ustar tianontianonpackage configs import ( "fmt" "syscall" ) type NamespaceType string const ( NEWNET NamespaceType = "NEWNET" NEWPID NamespaceType = "NEWPID" NEWNS NamespaceType = "NEWNS" NEWUTS NamespaceType = "NEWUTS" NEWIPC NamespaceType = "NEWIPC" NEWUSER NamespaceType = "NEWUSER" ) func NamespaceTypes() []NamespaceType { return []NamespaceType{ NEWNET, NEWPID, NEWNS, NEWUTS, NEWIPC, NEWUSER, } } // Namespace defines configuration for each namespace. It specifies an // alternate path that is able to be joined via setns. type Namespace struct { Type NamespaceType `json:"type"` Path string `json:"path"` } func (n *Namespace) Syscall() int { return namespaceInfo[n.Type] } func (n *Namespace) GetPath(pid int) string { if n.Path != "" { return n.Path } return fmt.Sprintf("/proc/%d/ns/%s", pid, n.file()) } func (n *Namespace) file() string { file := "" switch n.Type { case NEWNET: file = "net" case NEWNS: file = "mnt" case NEWPID: file = "pid" case NEWIPC: file = "ipc" case NEWUSER: file = "user" case NEWUTS: file = "uts" } return file } type Namespaces []Namespace func (n *Namespaces) Remove(t NamespaceType) bool { i := n.index(t) if i == -1 { return false } *n = append((*n)[:i], (*n)[i+1:]...) return true } func (n *Namespaces) Add(t NamespaceType, path string) { i := n.index(t) if i == -1 { *n = append(*n, Namespace{Type: t, Path: path}) return } (*n)[i].Path = path } func (n *Namespaces) index(t NamespaceType) int { for i, ns := range *n { if ns.Type == t { return i } } return -1 } func (n *Namespaces) Contains(t NamespaceType) bool { return n.index(t) != -1 } var namespaceInfo = map[NamespaceType]int{ NEWNET: syscall.CLONE_NEWNET, NEWNS: syscall.CLONE_NEWNS, NEWUSER: syscall.CLONE_NEWUSER, NEWIPC: syscall.CLONE_NEWIPC, NEWUTS: syscall.CLONE_NEWUTS, NEWPID: syscall.CLONE_NEWPID, } // CloneFlags parses the container's Namespaces options to set the correct // flags on clone, unshare. This functions returns flags only for new namespaces. func (n *Namespaces) CloneFlags() uintptr { var flag int for _, v := range *n { if v.Path != "" { continue } flag |= namespaceInfo[v.Type] } return uintptr(flag) } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/configs/device_defaults.go0000644000175000017500000000503212524212370025263 0ustar tianontianonpackage configs var ( // These are devices that are to be both allowed and created. DefaultSimpleDevices = []*Device{ // /dev/null and zero { Path: "/dev/null", Type: 'c', Major: 1, Minor: 3, Permissions: "rwm", FileMode: 0666, }, { Path: "/dev/zero", Type: 'c', Major: 1, Minor: 5, Permissions: "rwm", FileMode: 0666, }, { Path: "/dev/full", Type: 'c', Major: 1, Minor: 7, Permissions: "rwm", FileMode: 0666, }, // consoles and ttys { Path: "/dev/tty", Type: 'c', Major: 5, Minor: 0, Permissions: "rwm", FileMode: 0666, }, // /dev/urandom,/dev/random { Path: "/dev/urandom", Type: 'c', Major: 1, Minor: 9, Permissions: "rwm", FileMode: 0666, }, { Path: "/dev/random", Type: 'c', Major: 1, Minor: 8, Permissions: "rwm", FileMode: 0666, }, } DefaultAllowedDevices = append([]*Device{ // allow mknod for any device { Type: 'c', Major: Wildcard, Minor: Wildcard, Permissions: "m", }, { Type: 'b', Major: Wildcard, Minor: Wildcard, Permissions: "m", }, { Path: "/dev/console", Type: 'c', Major: 5, Minor: 1, Permissions: "rwm", }, { Path: "/dev/tty0", Type: 'c', Major: 4, Minor: 0, Permissions: "rwm", }, { Path: "/dev/tty1", Type: 'c', Major: 4, Minor: 1, Permissions: "rwm", }, // /dev/pts/ - pts namespaces are "coming soon" { Path: "", Type: 'c', Major: 136, Minor: Wildcard, Permissions: "rwm", }, { Path: "", Type: 'c', Major: 5, Minor: 2, Permissions: "rwm", }, // tuntap { Path: "", Type: 'c', Major: 10, Minor: 200, Permissions: "rwm", }, }, DefaultSimpleDevices...) DefaultAutoCreatedDevices = append([]*Device{ { // /dev/fuse is created but not allowed. // This is to allow java to work. Because java // Insists on there being a /dev/fuse // https://github.com/docker/docker/issues/514 // https://github.com/docker/docker/issues/2393 // Path: "/dev/fuse", Type: 'c', Major: 10, Minor: 229, Permissions: "rwm", }, }, DefaultSimpleDevices...) ) libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/configs/config.go0000644000175000017500000001327112524212370023406 0ustar tianontianonpackage configs import "fmt" type Rlimit struct { Type int `json:"type"` Hard uint64 `json:"hard"` Soft uint64 `json:"soft"` } // IDMap represents UID/GID Mappings for User Namespaces. type IDMap struct { ContainerID int `json:"container_id"` HostID int `json:"host_id"` Size int `json:"size"` } // Config defines configuration options for executing a process inside a contained environment. type Config struct { // NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs // This is a common option when the container is running in ramdisk NoPivotRoot bool `json:"no_pivot_root"` // ParentDeathSignal specifies the signal that is sent to the container's process in the case // that the parent process dies. ParentDeathSignal int `json:"parent_death_signal"` // PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set. // When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable. // This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot. PivotDir string `json:"pivot_dir"` // Path to a directory containing the container's root filesystem. Rootfs string `json:"rootfs"` // Readonlyfs will remount the container's rootfs as readonly where only externally mounted // bind mounts are writtable. Readonlyfs bool `json:"readonlyfs"` // Mounts specify additional source and destination paths that will be mounted inside the container's // rootfs and mount namespace if specified Mounts []*Mount `json:"mounts"` // The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well! Devices []*Device `json:"devices"` MountLabel string `json:"mount_label"` // Hostname optionally sets the container's hostname if provided Hostname string `json:"hostname"` // Namespaces specifies the container's namespaces that it should setup when cloning the init process // If a namespace is not provided that namespace is shared from the container's parent process Namespaces Namespaces `json:"namespaces"` // Capabilities specify the capabilities to keep when executing the process inside the container // All capbilities not specified will be dropped from the processes capability mask Capabilities []string `json:"capabilities"` // Networks specifies the container's network setup to be created Networks []*Network `json:"networks"` // Routes can be specified to create entries in the route table as the container is started Routes []*Route `json:"routes"` // Cgroups specifies specific cgroup settings for the various subsystems that the container is // placed into to limit the resources the container has available Cgroups *Cgroup `json:"cgroups"` // AppArmorProfile specifies the profile to apply to the process running in the container and is // change at the time the process is execed AppArmorProfile string `json:"apparmor_profile"` // ProcessLabel specifies the label to apply to the process running in the container. It is // commonly used by selinux ProcessLabel string `json:"process_label"` // Rlimits specifies the resource limits, such as max open files, to set in the container // If Rlimits are not set, the container will inherit rlimits from the parent process Rlimits []Rlimit `json:"rlimits"` // AdditionalGroups specifies the gids that should be added to supplementary groups // in addition to those that the user belongs to. AdditionalGroups []int `json:"additional_groups"` // UidMappings is an array of User ID mappings for User Namespaces UidMappings []IDMap `json:"uid_mappings"` // GidMappings is an array of Group ID mappings for User Namespaces GidMappings []IDMap `json:"gid_mappings"` // MaskPaths specifies paths within the container's rootfs to mask over with a bind // mount pointing to /dev/null as to prevent reads of the file. MaskPaths []string `json:"mask_paths"` // ReadonlyPaths specifies paths within the container's rootfs to remount as read-only // so that these files prevent any writes. ReadonlyPaths []string `json:"readonly_paths"` } // Gets the root uid for the process on host which could be non-zero // when user namespaces are enabled. func (c Config) HostUID() (int, error) { if c.Namespaces.Contains(NEWUSER) { if c.UidMappings == nil { return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.") } id, found := c.hostIDFromMapping(0, c.UidMappings) if !found { return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.") } return id, nil } // Return default root uid 0 return 0, nil } // Gets the root uid for the process on host which could be non-zero // when user namespaces are enabled. func (c Config) HostGID() (int, error) { if c.Namespaces.Contains(NEWUSER) { if c.GidMappings == nil { return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.") } id, found := c.hostIDFromMapping(0, c.GidMappings) if !found { return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.") } return id, nil } // Return default root uid 0 return 0, nil } // Utility function that gets a host ID for a container ID from user namespace map // if that ID is present in the map. func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) { for _, m := range uMap { if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) { hostID := m.HostID + (containerID - m.ContainerID) return hostID, true } } return -1, false } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/configs/config_test.go0000644000175000017500000001216312524212370024444 0ustar tianontianonpackage configs import ( "encoding/json" "fmt" "os" "path/filepath" "testing" ) // Checks whether the expected capability is specified in the capabilities. func contains(expected string, values []string) bool { for _, v := range values { if v == expected { return true } } return false } func containsDevice(expected *Device, values []*Device) bool { for _, d := range values { if d.Path == expected.Path && d.Permissions == expected.Permissions && d.FileMode == expected.FileMode && d.Major == expected.Major && d.Minor == expected.Minor && d.Type == expected.Type { return true } } return false } func loadConfig(name string) (*Config, error) { f, err := os.Open(filepath.Join("../sample_configs", name)) if err != nil { return nil, err } defer f.Close() var container *Config if err := json.NewDecoder(f).Decode(&container); err != nil { return nil, err } // Check that a config doesn't contain extra fields var configMap, abstractMap map[string]interface{} if _, err := f.Seek(0, 0); err != nil { return nil, err } if err := json.NewDecoder(f).Decode(&abstractMap); err != nil { return nil, err } configData, err := json.Marshal(&container) if err != nil { return nil, err } if err := json.Unmarshal(configData, &configMap); err != nil { return nil, err } for k := range configMap { delete(abstractMap, k) } if len(abstractMap) != 0 { return nil, fmt.Errorf("unknown fields: %s", abstractMap) } return container, nil } func TestConfigJsonFormat(t *testing.T) { container, err := loadConfig("attach_to_bridge.json") if err != nil { t.Fatal(err) } if container.Hostname != "koye" { t.Log("hostname is not set") t.Fail() } if !container.Namespaces.Contains(NEWNET) { t.Log("namespaces should contain NEWNET") t.Fail() } if container.Namespaces.Contains(NEWUSER) { t.Log("namespaces should not contain NEWUSER") t.Fail() } if contains("SYS_ADMIN", container.Capabilities) { t.Log("SYS_ADMIN should not be enabled in capabilities mask") t.Fail() } if !contains("MKNOD", container.Capabilities) { t.Log("MKNOD should be enabled in capabilities mask") t.Fail() } if !contains("SYS_CHROOT", container.Capabilities) { t.Log("capabilities mask should contain SYS_CHROOT") t.Fail() } for _, n := range container.Networks { if n.Type == "veth" { if n.Bridge != "docker0" { t.Logf("veth bridge should be docker0 but received %q", n.Bridge) t.Fail() } if n.Address != "172.17.0.101/16" { t.Logf("veth address should be 172.17.0.101/61 but received %q", n.Address) t.Fail() } if n.Gateway != "172.17.42.1" { t.Logf("veth gateway should be 172.17.42.1 but received %q", n.Gateway) t.Fail() } if n.Mtu != 1500 { t.Logf("veth mtu should be 1500 but received %d", n.Mtu) t.Fail() } break } } for _, d := range DefaultSimpleDevices { if !containsDevice(d, container.Devices) { t.Logf("expected device configuration for %s", d.Path) t.Fail() } } } func TestApparmorProfile(t *testing.T) { container, err := loadConfig("apparmor.json") if err != nil { t.Fatal(err) } if container.AppArmorProfile != "docker-default" { t.Fatalf("expected apparmor profile to be docker-default but received %q", container.AppArmorProfile) } } func TestSelinuxLabels(t *testing.T) { container, err := loadConfig("selinux.json") if err != nil { t.Fatal(err) } label := "system_u:system_r:svirt_lxc_net_t:s0:c164,c475" if container.ProcessLabel != label { t.Fatalf("expected process label %q but received %q", label, container.ProcessLabel) } if container.MountLabel != label { t.Fatalf("expected mount label %q but received %q", label, container.MountLabel) } } func TestRemoveNamespace(t *testing.T) { ns := Namespaces{ {Type: NEWNET}, } if !ns.Remove(NEWNET) { t.Fatal("NEWNET was not removed") } if len(ns) != 0 { t.Fatalf("namespaces should have 0 items but reports %d", len(ns)) } } func TestHostUIDNoUSERNS(t *testing.T) { config := &Config{ Namespaces: Namespaces{}, } uid, err := config.HostUID() if err != nil { t.Fatal(err) } if uid != 0 { t.Fatalf("expected uid 0 with no USERNS but received %d", uid) } } func TestHostUIDWithUSERNS(t *testing.T) { config := &Config{ Namespaces: Namespaces{{Type: NEWUSER}}, UidMappings: []IDMap{ { ContainerID: 0, HostID: 1000, Size: 1, }, }, } uid, err := config.HostUID() if err != nil { t.Fatal(err) } if uid != 1000 { t.Fatalf("expected uid 1000 with no USERNS but received %d", uid) } } func TestHostGIDNoUSERNS(t *testing.T) { config := &Config{ Namespaces: Namespaces{}, } uid, err := config.HostGID() if err != nil { t.Fatal(err) } if uid != 0 { t.Fatalf("expected gid 0 with no USERNS but received %d", uid) } } func TestHostGIDWithUSERNS(t *testing.T) { config := &Config{ Namespaces: Namespaces{{Type: NEWUSER}}, GidMappings: []IDMap{ { ContainerID: 0, HostID: 1000, Size: 1, }, }, } uid, err := config.HostGID() if err != nil { t.Fatal(err) } if uid != 1000 { t.Fatalf("expected gid 1000 with no USERNS but received %d", uid) } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/configs/network.go0000644000175000017500000000551412524212370023633 0ustar tianontianonpackage configs // Network defines configuration for a container's networking stack // // The network configuration can be omited from a container causing the // container to be setup with the host's networking stack type Network struct { // Type sets the networks type, commonly veth and loopback Type string `json:"type"` // Name of the network interface Name string `json:"name"` // The bridge to use. Bridge string `json:"bridge"` // MacAddress contains the MAC address to set on the network interface MacAddress string `json:"mac_address"` // Address contains the IPv4 and mask to set on the network interface Address string `json:"address"` // Gateway sets the gateway address that is used as the default for the interface Gateway string `json:"gateway"` // IPv6Address contains the IPv6 and mask to set on the network interface IPv6Address string `json:"ipv6_address"` // IPv6Gateway sets the ipv6 gateway address that is used as the default for the interface IPv6Gateway string `json:"ipv6_gateway"` // Mtu sets the mtu value for the interface and will be mirrored on both the host and // container's interfaces if a pair is created, specifically in the case of type veth // Note: This does not apply to loopback interfaces. Mtu int `json:"mtu"` // TxQueueLen sets the tx_queuelen value for the interface and will be mirrored on both the host and // container's interfaces if a pair is created, specifically in the case of type veth // Note: This does not apply to loopback interfaces. TxQueueLen int `json:"txqueuelen"` // HostInterfaceName is a unique name of a veth pair that resides on in the host interface of the // container. HostInterfaceName string `json:"host_interface_name"` // HairpinMode specifies if hairpin NAT should be enabled on the virtual interface // bridge port in the case of type veth // Note: This is unsupported on some systems. // Note: This does not apply to loopback interfaces. HairpinMode bool `json:"hairpin_mode"` } // Routes can be specified to create entries in the route table as the container is started // // All of destination, source, and gateway should be either IPv4 or IPv6. // One of the three options must be present, and ommitted entries will use their // IP family default for the route table. For IPv4 for example, setting the // gateway to 1.2.3.4 and the interface to eth0 will set up a standard // destination of 0.0.0.0(or *) when viewed in the route table. type Route struct { // Sets the destination and mask, should be a CIDR. Accepts IPv4 and IPv6 Destination string `json:"destination"` // Sets the source and mask, should be a CIDR. Accepts IPv4 and IPv6 Source string `json:"source"` // Sets the gateway. Accepts IPv4 and IPv6 Gateway string `json:"gateway"` // The device to set this route up for, for example: eth0 InterfaceName string `json:"interface_name"` } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/configs/cgroup.go0000644000175000017500000000306712524212370023442 0ustar tianontianonpackage configs type FreezerState string const ( Undefined FreezerState = "" Frozen FreezerState = "FROZEN" Thawed FreezerState = "THAWED" ) type Cgroup struct { Name string `json:"name"` // name of parent cgroup or slice Parent string `json:"parent"` // If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list. AllowAllDevices bool `json:"allow_all_devices"` AllowedDevices []*Device `json:"allowed_devices"` // Memory limit (in bytes) Memory int64 `json:"memory"` // Memory reservation or soft_limit (in bytes) MemoryReservation int64 `json:"memory_reservation"` // Total memory usage (memory + swap); set `-1' to disable swap MemorySwap int64 `json:"memory_swap"` // CPU shares (relative weight vs. other containers) CpuShares int64 `json:"cpu_shares"` // CPU hardcap limit (in usecs). Allowed cpu time in a given period. CpuQuota int64 `json:"cpu_quota"` // CPU period to be used for hardcapping (in usecs). 0 to use system default. CpuPeriod int64 `json:"cpu_period"` // CPU to use CpusetCpus string `json:"cpuset_cpus"` // MEM to use CpusetMems string `json:"cpuset_mems"` // Specifies per cgroup weight, range is from 10 to 1000. BlkioWeight int64 `json:"blkio_weight"` // set the freeze value for the process Freezer FreezerState `json:"freezer"` // Parent slice to use for systemd TODO: remove in favor or parent Slice string `json:"slice"` // Whether to disable OOM Killer OomKillDisable bool `json:"oom_kill_disable"` } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/devices/0000755000175000017500000000000012524212370021600 5ustar tianontianonlibcontainer-227771c8f611f03639f0eeb169428761d9504ab5/devices/number.go0000644000175000017500000000155312524212370023423 0ustar tianontianonpackage devices /* This code provides support for manipulating linux device numbers. It should be replaced by normal syscall functions once http://code.google.com/p/go/issues/detail?id=8106 is solved. You can read what they are here: - http://www.makelinux.net/ldd3/chp-3-sect-2 - http://www.linux-tutorial.info/modules.php?name=MContent&pageid=94 Note! These are NOT the same as the MAJOR(dev_t device);, MINOR(dev_t device); and MKDEV(int major, int minor); functions as defined in as the representation of device numbers used by go is different than the one used internally to the kernel! - https://github.com/torvalds/linux/blob/master/include/linux/kdev_t.h#L9 */ func Major(devNumber int) int64 { return int64((devNumber >> 8) & 0xfff) } func Minor(devNumber int) int64 { return int64((devNumber & 0xff) | ((devNumber >> 12) & 0xfff00)) } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/devices/devices.go0000644000175000017500000000424212524212370023553 0ustar tianontianonpackage devices import ( "errors" "fmt" "io/ioutil" "os" "path/filepath" "syscall" "github.com/docker/libcontainer/configs" ) var ( ErrNotADevice = errors.New("not a device node") ) // Testing dependencies var ( osLstat = os.Lstat ioutilReadDir = ioutil.ReadDir ) // Given the path to a device and it's cgroup_permissions(which cannot be easilly queried) look up the information about a linux device and return that information as a Device struct. func DeviceFromPath(path, permissions string) (*configs.Device, error) { fileInfo, err := osLstat(path) if err != nil { return nil, err } var ( devType rune mode = fileInfo.Mode() fileModePermissionBits = os.FileMode.Perm(mode) ) switch { case mode&os.ModeDevice == 0: return nil, ErrNotADevice case mode&os.ModeCharDevice != 0: fileModePermissionBits |= syscall.S_IFCHR devType = 'c' default: fileModePermissionBits |= syscall.S_IFBLK devType = 'b' } stat_t, ok := fileInfo.Sys().(*syscall.Stat_t) if !ok { return nil, fmt.Errorf("cannot determine the device number for device %s", path) } devNumber := int(stat_t.Rdev) return &configs.Device{ Type: devType, Path: path, Major: Major(devNumber), Minor: Minor(devNumber), Permissions: permissions, FileMode: fileModePermissionBits, Uid: stat_t.Uid, Gid: stat_t.Gid, }, nil } func HostDevices() ([]*configs.Device, error) { return getDevices("/dev") } func getDevices(path string) ([]*configs.Device, error) { files, err := ioutilReadDir(path) if err != nil { return nil, err } out := []*configs.Device{} for _, f := range files { switch { case f.IsDir(): switch f.Name() { case "pts", "shm", "fd", "mqueue": continue default: sub, err := getDevices(filepath.Join(path, f.Name())) if err != nil { return nil, err } out = append(out, sub...) continue } case f.Name() == "console": continue } device, err := DeviceFromPath(filepath.Join(path, f.Name()), "rwm") if err != nil { if err == ErrNotADevice { continue } return nil, err } out = append(out, device) } return out, nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/devices/devices_test.go0000644000175000017500000000245312524212370024614 0ustar tianontianonpackage devices import ( "errors" "os" "testing" ) func TestDeviceFromPathLstatFailure(t *testing.T) { testError := errors.New("test error") // Override os.Lstat to inject error. osLstat = func(path string) (os.FileInfo, error) { return nil, testError } _, err := DeviceFromPath("", "") if err != testError { t.Fatalf("Unexpected error %v, expected %v", err, testError) } } func TestHostDevicesIoutilReadDirFailure(t *testing.T) { testError := errors.New("test error") // Override ioutil.ReadDir to inject error. ioutilReadDir = func(dirname string) ([]os.FileInfo, error) { return nil, testError } _, err := HostDevices() if err != testError { t.Fatalf("Unexpected error %v, expected %v", err, testError) } } func TestHostDevicesIoutilReadDirDeepFailure(t *testing.T) { testError := errors.New("test error") called := false // Override ioutil.ReadDir to inject error after the first call. ioutilReadDir = func(dirname string) ([]os.FileInfo, error) { if called { return nil, testError } called = true // Provoke a second call. fi, err := os.Lstat("/tmp") if err != nil { t.Fatalf("Unexpected error %v", err) } return []os.FileInfo{fi}, nil } _, err := HostDevices() if err != testError { t.Fatalf("Unexpected error %v, expected %v", err, testError) } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/cgroups/0000755000175000017500000000000012524212370021640 5ustar tianontianonlibcontainer-227771c8f611f03639f0eeb169428761d9504ab5/cgroups/stats.go0000644000175000017500000000531612524212370023332 0ustar tianontianonpackage cgroups type ThrottlingData struct { // Number of periods with throttling active Periods uint64 `json:"periods,omitempty"` // Number of periods when the container hit its throttling limit. ThrottledPeriods uint64 `json:"throttled_periods,omitempty"` // Aggregate time the container was throttled for in nanoseconds. ThrottledTime uint64 `json:"throttled_time,omitempty"` } // All CPU stats are aggregate since container inception. type CpuUsage struct { // Total CPU time consumed. // Units: nanoseconds. TotalUsage uint64 `json:"total_usage,omitempty"` // Total CPU time consumed per core. // Units: nanoseconds. PercpuUsage []uint64 `json:"percpu_usage,omitempty"` // Time spent by tasks of the cgroup in kernel mode. // Units: nanoseconds. UsageInKernelmode uint64 `json:"usage_in_kernelmode"` // Time spent by tasks of the cgroup in user mode. // Units: nanoseconds. UsageInUsermode uint64 `json:"usage_in_usermode"` } type CpuStats struct { CpuUsage CpuUsage `json:"cpu_usage,omitempty"` ThrottlingData ThrottlingData `json:"throttling_data,omitempty"` } type MemoryStats struct { // current res_counter usage for memory Usage uint64 `json:"usage,omitempty"` // maximum usage ever recorded. MaxUsage uint64 `json:"max_usage,omitempty"` // TODO(vishh): Export these as stronger types. // all the stats exported via memory.stat. Stats map[string]uint64 `json:"stats,omitempty"` // number of times memory usage hits limits. Failcnt uint64 `json:"failcnt"` } type BlkioStatEntry struct { Major uint64 `json:"major,omitempty"` Minor uint64 `json:"minor,omitempty"` Op string `json:"op,omitempty"` Value uint64 `json:"value,omitempty"` } type BlkioStats struct { // number of bytes tranferred to and from the block device IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"` IoServicedRecursive []BlkioStatEntry `json:"io_serviced_recursive,omitempty"` IoQueuedRecursive []BlkioStatEntry `json:"io_queue_recursive,omitempty"` IoServiceTimeRecursive []BlkioStatEntry `json:"io_service_time_recursive,omitempty"` IoWaitTimeRecursive []BlkioStatEntry `json:"io_wait_time_recursive,omitempty"` IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitempty"` IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitempty"` SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"` } type Stats struct { CpuStats CpuStats `json:"cpu_stats,omitempty"` MemoryStats MemoryStats `json:"memory_stats,omitempty"` BlkioStats BlkioStats `json:"blkio_stats,omitempty"` } func NewStats() *Stats { memoryStats := MemoryStats{Stats: make(map[string]uint64)} return &Stats{MemoryStats: memoryStats} } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/cgroups/cgroups_test.go0000644000175000017500000000063712524212370024716 0ustar tianontianonpackage cgroups import ( "bytes" "testing" ) const ( cgroupsContents = `11:hugetlb:/ 10:perf_event:/ 9:blkio:/ 8:net_cls:/ 7:freezer:/ 6:devices:/ 5:memory:/ 4:cpuacct,cpu:/ 3:cpuset:/ 2:name=systemd:/user.slice/user-1000.slice/session-16.scope` ) func TestParseCgroups(t *testing.T) { r := bytes.NewBuffer([]byte(cgroupsContents)) _, err := ParseCgroupFile("blkio", r) if err != nil { t.Fatal(err) } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/cgroups/systemd/0000755000175000017500000000000012524212370023330 5ustar tianontianonlibcontainer-227771c8f611f03639f0eeb169428761d9504ab5/cgroups/systemd/apply_systemd.go0000644000175000017500000002426112524212370026561 0ustar tianontianon// +build linux package systemd import ( "fmt" "io/ioutil" "os" "path/filepath" "strconv" "strings" "sync" "time" systemd "github.com/coreos/go-systemd/dbus" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups/fs" "github.com/docker/libcontainer/configs" "github.com/godbus/dbus" ) type Manager struct { Cgroups *configs.Cgroup Paths map[string]string } type subsystem interface { // Returns the stats, as 'stats', corresponding to the cgroup under 'path'. GetStats(path string, stats *cgroups.Stats) error // Set the cgroup represented by cgroup. Set(path string, cgroup *configs.Cgroup) error } var subsystems = map[string]subsystem{ "devices": &fs.DevicesGroup{}, "memory": &fs.MemoryGroup{}, "cpu": &fs.CpuGroup{}, "cpuset": &fs.CpusetGroup{}, "cpuacct": &fs.CpuacctGroup{}, "blkio": &fs.BlkioGroup{}, "perf_event": &fs.PerfEventGroup{}, "freezer": &fs.FreezerGroup{}, } const ( testScopeWait = 4 ) var ( connLock sync.Mutex theConn *systemd.Conn hasStartTransientUnit bool hasTransientDefaultDependencies bool ) func newProp(name string, units interface{}) systemd.Property { return systemd.Property{ Name: name, Value: dbus.MakeVariant(units), } } func UseSystemd() bool { s, err := os.Stat("/run/systemd/system") if err != nil || !s.IsDir() { return false } connLock.Lock() defer connLock.Unlock() if theConn == nil { var err error theConn, err = systemd.New() if err != nil { return false } // Assume we have StartTransientUnit hasStartTransientUnit = true // But if we get UnknownMethod error we don't if _, err := theConn.StartTransientUnit("test.scope", "invalid"); err != nil { if dbusError, ok := err.(dbus.Error); ok { if dbusError.Name == "org.freedesktop.DBus.Error.UnknownMethod" { hasStartTransientUnit = false return hasStartTransientUnit } } } // Ensure the scope name we use doesn't exist. Use the Pid to // avoid collisions between multiple libcontainer users on a // single host. scope := fmt.Sprintf("libcontainer-%d-systemd-test-default-dependencies.scope", os.Getpid()) testScopeExists := true for i := 0; i <= testScopeWait; i++ { if _, err := theConn.StopUnit(scope, "replace"); err != nil { if dbusError, ok := err.(dbus.Error); ok { if strings.Contains(dbusError.Name, "org.freedesktop.systemd1.NoSuchUnit") { testScopeExists = false break } } } time.Sleep(time.Millisecond) } // Bail out if we can't kill this scope without testing for DefaultDependencies if testScopeExists { return hasStartTransientUnit } // Assume StartTransientUnit on a scope allows DefaultDependencies hasTransientDefaultDependencies = true ddf := newProp("DefaultDependencies", false) if _, err := theConn.StartTransientUnit(scope, "replace", ddf); err != nil { if dbusError, ok := err.(dbus.Error); ok { if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") { hasTransientDefaultDependencies = false } } } // Not critical because of the stop unit logic above. theConn.StopUnit(scope, "replace") } return hasStartTransientUnit } func getIfaceForUnit(unitName string) string { if strings.HasSuffix(unitName, ".scope") { return "Scope" } if strings.HasSuffix(unitName, ".service") { return "Service" } return "Unit" } func (m *Manager) Apply(pid int) error { var ( c = m.Cgroups unitName = getUnitName(c) slice = "system.slice" properties []systemd.Property ) if c.Slice != "" { slice = c.Slice } properties = append(properties, systemd.PropSlice(slice), systemd.PropDescription("docker container "+c.Name), newProp("PIDs", []uint32{uint32(pid)}), ) // Always enable accounting, this gets us the same behaviour as the fs implementation, // plus the kernel has some problems with joining the memory cgroup at a later time. properties = append(properties, newProp("MemoryAccounting", true), newProp("CPUAccounting", true), newProp("BlockIOAccounting", true)) if hasTransientDefaultDependencies { properties = append(properties, newProp("DefaultDependencies", false)) } if c.Memory != 0 { properties = append(properties, newProp("MemoryLimit", uint64(c.Memory))) } // TODO: MemoryReservation and MemorySwap not available in systemd if c.CpuShares != 0 { properties = append(properties, newProp("CPUShares", uint64(c.CpuShares))) } if c.BlkioWeight != 0 { properties = append(properties, newProp("BlockIOWeight", uint64(c.BlkioWeight))) } if _, err := theConn.StartTransientUnit(unitName, "replace", properties...); err != nil { return err } if err := joinDevices(c, pid); err != nil { return err } // TODO: CpuQuota and CpuPeriod not available in systemd // we need to manually join the cpu.cfs_quota_us and cpu.cfs_period_us if err := joinCpu(c, pid); err != nil { return err } // -1 disables memorySwap if c.MemorySwap >= 0 && c.Memory != 0 { if err := joinMemory(c, pid); err != nil { return err } } // we need to manually join the freezer and cpuset cgroup in systemd // because it does not currently support it via the dbus api. if err := joinFreezer(c, pid); err != nil { return err } if err := joinCpuset(c, pid); err != nil { return err } paths := make(map[string]string) for sysname := range subsystems { subsystemPath, err := getSubsystemPath(m.Cgroups, sysname) if err != nil { // Don't fail if a cgroup hierarchy was not found, just skip this subsystem if cgroups.IsNotFound(err) { continue } return err } paths[sysname] = subsystemPath } m.Paths = paths return nil } func (m *Manager) Destroy() error { return cgroups.RemovePaths(m.Paths) } func (m *Manager) GetPaths() map[string]string { return m.Paths } func writeFile(dir, file, data string) error { return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700) } func join(c *configs.Cgroup, subsystem string, pid int) (string, error) { path, err := getSubsystemPath(c, subsystem) if err != nil { return "", err } if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { return "", err } if err := writeFile(path, "cgroup.procs", strconv.Itoa(pid)); err != nil { return "", err } return path, nil } func joinCpu(c *configs.Cgroup, pid int) error { path, err := getSubsystemPath(c, "cpu") if err != nil { return err } if c.CpuQuota != 0 { if err = ioutil.WriteFile(filepath.Join(path, "cpu.cfs_quota_us"), []byte(strconv.FormatInt(c.CpuQuota, 10)), 0700); err != nil { return err } } if c.CpuPeriod != 0 { if err = ioutil.WriteFile(filepath.Join(path, "cpu.cfs_period_us"), []byte(strconv.FormatInt(c.CpuPeriod, 10)), 0700); err != nil { return err } } return nil } func joinFreezer(c *configs.Cgroup, pid int) error { if _, err := join(c, "freezer", pid); err != nil { return err } return nil } func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) { mountpoint, err := cgroups.FindCgroupMountpoint(subsystem) if err != nil { return "", err } initPath, err := cgroups.GetInitCgroupDir(subsystem) if err != nil { return "", err } slice := "system.slice" if c.Slice != "" { slice = c.Slice } return filepath.Join(mountpoint, initPath, slice, getUnitName(c)), nil } func (m *Manager) Freeze(state configs.FreezerState) error { path, err := getSubsystemPath(m.Cgroups, "freezer") if err != nil { return err } prevState := m.Cgroups.Freezer m.Cgroups.Freezer = state freezer := subsystems["freezer"] err = freezer.Set(path, m.Cgroups) if err != nil { m.Cgroups.Freezer = prevState return err } return nil } func (m *Manager) GetPids() ([]int, error) { path, err := getSubsystemPath(m.Cgroups, "cpu") if err != nil { return nil, err } return cgroups.ReadProcsFile(path) } func (m *Manager) GetStats() (*cgroups.Stats, error) { stats := cgroups.NewStats() for name, path := range m.Paths { sys, ok := subsystems[name] if !ok || !cgroups.PathExists(path) { continue } if err := sys.GetStats(path, stats); err != nil { return nil, err } } return stats, nil } func (m *Manager) Set(container *configs.Config) error { panic("not implemented") } func getUnitName(c *configs.Cgroup) string { return fmt.Sprintf("%s-%s.scope", c.Parent, c.Name) } // Atm we can't use the systemd device support because of two missing things: // * Support for wildcards to allow mknod on any device // * Support for wildcards to allow /dev/pts support // // The second is available in more recent systemd as "char-pts", but not in e.g. v208 which is // in wide use. When both these are availalable we will be able to switch, but need to keep the old // implementation for backwards compat. // // Note: we can't use systemd to set up the initial limits, and then change the cgroup // because systemd will re-write the device settings if it needs to re-apply the cgroup context. // This happens at least for v208 when any sibling unit is started. func joinDevices(c *configs.Cgroup, pid int) error { path, err := join(c, "devices", pid) if err != nil { return err } devices := subsystems["devices"] if err := devices.Set(path, c); err != nil { return err } return nil } // Symmetrical public function to update device based cgroups. Also available // in the fs implementation. func ApplyDevices(c *configs.Cgroup, pid int) error { return joinDevices(c, pid) } func joinMemory(c *configs.Cgroup, pid int) error { memorySwap := c.MemorySwap if memorySwap == 0 { // By default, MemorySwap is set to twice the size of RAM. memorySwap = c.Memory * 2 } path, err := getSubsystemPath(c, "memory") if err != nil { return err } return ioutil.WriteFile(filepath.Join(path, "memory.memsw.limit_in_bytes"), []byte(strconv.FormatInt(memorySwap, 10)), 0700) } // systemd does not atm set up the cpuset controller, so we must manually // join it. Additionally that is a very finicky controller where each // level must have a full setup as the default for a new directory is "no cpus" func joinCpuset(c *configs.Cgroup, pid int) error { path, err := getSubsystemPath(c, "cpuset") if err != nil { return err } s := &fs.CpusetGroup{} return s.ApplyDir(path, c, pid) } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/cgroups/systemd/apply_nosystemd.go0000644000175000017500000000215012524212370027107 0ustar tianontianon// +build !linux package systemd import ( "fmt" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" ) type Manager struct { Cgroups *configs.Cgroup Paths map[string]string } func UseSystemd() bool { return false } func (m *Manager) Apply(pid int) error { return fmt.Errorf("Systemd not supported") } func (m *Manager) GetPids() ([]int, error) { return nil, fmt.Errorf("Systemd not supported") } func (m *Manager) Destroy() error { return fmt.Errorf("Systemd not supported") } func (m *Manager) GetPaths() map[string]string { return nil } func (m *Manager) GetStats() (*cgroups.Stats, error) { return nil, fmt.Errorf("Systemd not supported") } func (m *Manager) Set(container *configs.Config) error { return nil, fmt.Errorf("Systemd not supported") } func (m *Manager) Freeze(state configs.FreezerState) error { return fmt.Errorf("Systemd not supported") } func ApplyDevices(c *configs.Cgroup, pid int) error { return fmt.Errorf("Systemd not supported") } func Freeze(c *configs.Cgroup, state configs.FreezerState) error { return fmt.Errorf("Systemd not supported") } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/cgroups/cgroups.go0000644000175000017500000000245712524212370023661 0ustar tianontianonpackage cgroups import ( "fmt" "github.com/docker/libcontainer/configs" ) type Manager interface { // Apply cgroup configuration to the process with the specified pid Apply(pid int) error // Returns the PIDs inside the cgroup set GetPids() ([]int, error) // Returns statistics for the cgroup set GetStats() (*Stats, error) // Toggles the freezer cgroup according with specified state Freeze(state configs.FreezerState) error // Destroys the cgroup set Destroy() error // NewCgroupManager() and LoadCgroupManager() require following attributes: // Paths map[string]string // Cgroups *cgroups.Cgroup // Paths maps cgroup subsystem to path at which it is mounted. // Cgroups specifies specific cgroup settings for the various subsystems // Returns cgroup paths to save in a state file and to be able to // restore the object later. GetPaths() map[string]string // Set the cgroup as configured. Set(container *configs.Config) error } type NotFoundError struct { Subsystem string } func (e *NotFoundError) Error() string { return fmt.Sprintf("mountpoint for %s not found", e.Subsystem) } func NewNotFoundError(sub string) error { return &NotFoundError{ Subsystem: sub, } } func IsNotFound(err error) bool { if err == nil { return false } _, ok := err.(*NotFoundError) return ok } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/cgroups/fs/0000755000175000017500000000000012524212370022250 5ustar tianontianonlibcontainer-227771c8f611f03639f0eeb169428761d9504ab5/cgroups/fs/freezer_test.go0000644000175000017500000000207012524212370025277 0ustar tianontianonpackage fs import ( "testing" "github.com/docker/libcontainer/configs" ) func TestFreezerSetState(t *testing.T) { helper := NewCgroupTestUtil("freezer", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "freezer.state": string(configs.Frozen), }) helper.CgroupData.c.Freezer = configs.Thawed freezer := &FreezerGroup{} if err := freezer.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { t.Fatal(err) } value, err := getCgroupParamString(helper.CgroupPath, "freezer.state") if err != nil { t.Fatalf("Failed to parse freezer.state - %s", err) } if value != string(configs.Thawed) { t.Fatal("Got the wrong value, set freezer.state failed.") } } func TestFreezerSetInvalidState(t *testing.T) { helper := NewCgroupTestUtil("freezer", t) defer helper.cleanup() const ( invalidArg configs.FreezerState = "Invalid" ) helper.CgroupData.c.Freezer = invalidArg freezer := &FreezerGroup{} if err := freezer.Set(helper.CgroupPath, helper.CgroupData.c); err == nil { t.Fatal("Failed to return invalid argument error") } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/cgroups/fs/blkio_test.go0000644000175000017500000004004712524212370024743 0ustar tianontianonpackage fs import ( "strconv" "testing" "github.com/docker/libcontainer/cgroups" ) const ( sectorsRecursiveContents = `8:0 1024` serviceBytesRecursiveContents = `8:0 Read 100 8:0 Write 200 8:0 Sync 300 8:0 Async 500 8:0 Total 500 Total 500` servicedRecursiveContents = `8:0 Read 10 8:0 Write 40 8:0 Sync 20 8:0 Async 30 8:0 Total 50 Total 50` queuedRecursiveContents = `8:0 Read 1 8:0 Write 4 8:0 Sync 2 8:0 Async 3 8:0 Total 5 Total 5` serviceTimeRecursiveContents = `8:0 Read 173959 8:0 Write 0 8:0 Sync 0 8:0 Async 173959 8:0 Total 17395 Total 17395` waitTimeRecursiveContents = `8:0 Read 15571 8:0 Write 0 8:0 Sync 0 8:0 Async 15571 8:0 Total 15571` mergedRecursiveContents = `8:0 Read 5 8:0 Write 10 8:0 Sync 0 8:0 Async 0 8:0 Total 15 Total 15` timeRecursiveContents = `8:0 8` throttleServiceBytes = `8:0 Read 11030528 8:0 Write 23 8:0 Sync 42 8:0 Async 11030528 8:0 Total 11030528 252:0 Read 11030528 252:0 Write 23 252:0 Sync 42 252:0 Async 11030528 252:0 Total 11030528 Total 22061056` throttleServiced = `8:0 Read 164 8:0 Write 23 8:0 Sync 42 8:0 Async 164 8:0 Total 164 252:0 Read 164 252:0 Write 23 252:0 Sync 42 252:0 Async 164 252:0 Total 164 Total 328` ) func appendBlkioStatEntry(blkioStatEntries *[]cgroups.BlkioStatEntry, major, minor, value uint64, op string) { *blkioStatEntries = append(*blkioStatEntries, cgroups.BlkioStatEntry{Major: major, Minor: minor, Value: value, Op: op}) } func TestBlkioSetWeight(t *testing.T) { helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() const ( weightBefore = 100 weightAfter = 200 ) helper.writeFileContents(map[string]string{ "blkio.weight": strconv.Itoa(weightBefore), }) helper.CgroupData.c.BlkioWeight = weightAfter blkio := &BlkioGroup{} if err := blkio.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { t.Fatal(err) } value, err := getCgroupParamUint(helper.CgroupPath, "blkio.weight") if err != nil { t.Fatalf("Failed to parse blkio.weight - %s", err) } if value != weightAfter { t.Fatal("Got the wrong value, set blkio.weight failed.") } } func TestBlkioStats(t *testing.T) { helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, "blkio.io_serviced_recursive": servicedRecursiveContents, "blkio.io_queued_recursive": queuedRecursiveContents, "blkio.io_service_time_recursive": serviceTimeRecursiveContents, "blkio.io_wait_time_recursive": waitTimeRecursiveContents, "blkio.io_merged_recursive": mergedRecursiveContents, "blkio.time_recursive": timeRecursiveContents, "blkio.sectors_recursive": sectorsRecursiveContents, }) blkio := &BlkioGroup{} actualStats := *cgroups.NewStats() err := blkio.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatal(err) } // Verify expected stats. expectedStats := cgroups.BlkioStats{} appendBlkioStatEntry(&expectedStats.SectorsRecursive, 8, 0, 1024, "") appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 100, "Read") appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 200, "Write") appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 300, "Sync") appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 500, "Async") appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 500, "Total") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 10, "Read") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 40, "Write") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 20, "Sync") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 30, "Async") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 50, "Total") appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 1, "Read") appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 4, "Write") appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 2, "Sync") appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 3, "Async") appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 5, "Total") appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 173959, "Read") appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 0, "Write") appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 0, "Sync") appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 173959, "Async") appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 17395, "Total") appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Read") appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 0, "Write") appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 0, "Sync") appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Async") appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Total") appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 5, "Read") appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 10, "Write") appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 0, "Sync") appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 0, "Async") appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 15, "Total") appendBlkioStatEntry(&expectedStats.IoTimeRecursive, 8, 0, 8, "") expectBlkioStatsEquals(t, expectedStats, actualStats.BlkioStats) } func TestBlkioStatsNoSectorsFile(t *testing.T) { helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, "blkio.io_serviced_recursive": servicedRecursiveContents, "blkio.io_queued_recursive": queuedRecursiveContents, "blkio.io_service_time_recursive": serviceTimeRecursiveContents, "blkio.io_wait_time_recursive": waitTimeRecursiveContents, "blkio.io_merged_recursive": mergedRecursiveContents, "blkio.time_recursive": timeRecursiveContents, }) blkio := &BlkioGroup{} actualStats := *cgroups.NewStats() err := blkio.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatalf("Failed unexpectedly: %s", err) } } func TestBlkioStatsNoServiceBytesFile(t *testing.T) { helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "blkio.io_serviced_recursive": servicedRecursiveContents, "blkio.io_queued_recursive": queuedRecursiveContents, "blkio.sectors_recursive": sectorsRecursiveContents, "blkio.io_service_time_recursive": serviceTimeRecursiveContents, "blkio.io_wait_time_recursive": waitTimeRecursiveContents, "blkio.io_merged_recursive": mergedRecursiveContents, "blkio.time_recursive": timeRecursiveContents, }) blkio := &BlkioGroup{} actualStats := *cgroups.NewStats() err := blkio.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatalf("Failed unexpectedly: %s", err) } } func TestBlkioStatsNoServicedFile(t *testing.T) { helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, "blkio.io_queued_recursive": queuedRecursiveContents, "blkio.sectors_recursive": sectorsRecursiveContents, "blkio.io_service_time_recursive": serviceTimeRecursiveContents, "blkio.io_wait_time_recursive": waitTimeRecursiveContents, "blkio.io_merged_recursive": mergedRecursiveContents, "blkio.time_recursive": timeRecursiveContents, }) blkio := &BlkioGroup{} actualStats := *cgroups.NewStats() err := blkio.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatalf("Failed unexpectedly: %s", err) } } func TestBlkioStatsNoQueuedFile(t *testing.T) { helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, "blkio.io_serviced_recursive": servicedRecursiveContents, "blkio.sectors_recursive": sectorsRecursiveContents, "blkio.io_service_time_recursive": serviceTimeRecursiveContents, "blkio.io_wait_time_recursive": waitTimeRecursiveContents, "blkio.io_merged_recursive": mergedRecursiveContents, "blkio.time_recursive": timeRecursiveContents, }) blkio := &BlkioGroup{} actualStats := *cgroups.NewStats() err := blkio.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatalf("Failed unexpectedly: %s", err) } } func TestBlkioStatsNoServiceTimeFile(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, "blkio.io_serviced_recursive": servicedRecursiveContents, "blkio.io_queued_recursive": queuedRecursiveContents, "blkio.io_wait_time_recursive": waitTimeRecursiveContents, "blkio.io_merged_recursive": mergedRecursiveContents, "blkio.time_recursive": timeRecursiveContents, "blkio.sectors_recursive": sectorsRecursiveContents, }) blkio := &BlkioGroup{} actualStats := *cgroups.NewStats() err := blkio.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatalf("Failed unexpectedly: %s", err) } } func TestBlkioStatsNoWaitTimeFile(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, "blkio.io_serviced_recursive": servicedRecursiveContents, "blkio.io_queued_recursive": queuedRecursiveContents, "blkio.io_service_time_recursive": serviceTimeRecursiveContents, "blkio.io_merged_recursive": mergedRecursiveContents, "blkio.time_recursive": timeRecursiveContents, "blkio.sectors_recursive": sectorsRecursiveContents, }) blkio := &BlkioGroup{} actualStats := *cgroups.NewStats() err := blkio.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatalf("Failed unexpectedly: %s", err) } } func TestBlkioStatsNoMergedFile(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, "blkio.io_serviced_recursive": servicedRecursiveContents, "blkio.io_queued_recursive": queuedRecursiveContents, "blkio.io_service_time_recursive": serviceTimeRecursiveContents, "blkio.io_wait_time_recursive": waitTimeRecursiveContents, "blkio.time_recursive": timeRecursiveContents, "blkio.sectors_recursive": sectorsRecursiveContents, }) blkio := &BlkioGroup{} actualStats := *cgroups.NewStats() err := blkio.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatalf("Failed unexpectedly: %s", err) } } func TestBlkioStatsNoTimeFile(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, "blkio.io_serviced_recursive": servicedRecursiveContents, "blkio.io_queued_recursive": queuedRecursiveContents, "blkio.io_service_time_recursive": serviceTimeRecursiveContents, "blkio.io_wait_time_recursive": waitTimeRecursiveContents, "blkio.io_merged_recursive": mergedRecursiveContents, "blkio.sectors_recursive": sectorsRecursiveContents, }) blkio := &BlkioGroup{} actualStats := *cgroups.NewStats() err := blkio.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatalf("Failed unexpectedly: %s", err) } } func TestBlkioStatsUnexpectedNumberOfFields(t *testing.T) { helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "blkio.io_service_bytes_recursive": "8:0 Read 100 100", "blkio.io_serviced_recursive": servicedRecursiveContents, "blkio.io_queued_recursive": queuedRecursiveContents, "blkio.sectors_recursive": sectorsRecursiveContents, "blkio.io_service_time_recursive": serviceTimeRecursiveContents, "blkio.io_wait_time_recursive": waitTimeRecursiveContents, "blkio.io_merged_recursive": mergedRecursiveContents, "blkio.time_recursive": timeRecursiveContents, }) blkio := &BlkioGroup{} actualStats := *cgroups.NewStats() err := blkio.GetStats(helper.CgroupPath, &actualStats) if err == nil { t.Fatal("Expected to fail, but did not") } } func TestBlkioStatsUnexpectedFieldType(t *testing.T) { helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "blkio.io_service_bytes_recursive": "8:0 Read Write", "blkio.io_serviced_recursive": servicedRecursiveContents, "blkio.io_queued_recursive": queuedRecursiveContents, "blkio.sectors_recursive": sectorsRecursiveContents, "blkio.io_service_time_recursive": serviceTimeRecursiveContents, "blkio.io_wait_time_recursive": waitTimeRecursiveContents, "blkio.io_merged_recursive": mergedRecursiveContents, "blkio.time_recursive": timeRecursiveContents, }) blkio := &BlkioGroup{} actualStats := *cgroups.NewStats() err := blkio.GetStats(helper.CgroupPath, &actualStats) if err == nil { t.Fatal("Expected to fail, but did not") } } func TestNonCFQBlkioStats(t *testing.T) { helper := NewCgroupTestUtil("blkio", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "blkio.io_service_bytes_recursive": "", "blkio.io_serviced_recursive": "", "blkio.io_queued_recursive": "", "blkio.sectors_recursive": "", "blkio.io_service_time_recursive": "", "blkio.io_wait_time_recursive": "", "blkio.io_merged_recursive": "", "blkio.time_recursive": "", "blkio.throttle.io_service_bytes": throttleServiceBytes, "blkio.throttle.io_serviced": throttleServiced, }) blkio := &BlkioGroup{} actualStats := *cgroups.NewStats() err := blkio.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatal(err) } // Verify expected stats. expectedStats := cgroups.BlkioStats{} appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Read") appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 23, "Write") appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 42, "Sync") appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Async") appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Total") appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Read") appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 23, "Write") appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 42, "Sync") appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Async") appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Total") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Read") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 23, "Write") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 42, "Sync") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Async") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Total") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Read") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 23, "Write") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 42, "Sync") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Async") appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Total") expectBlkioStatsEquals(t, expectedStats, actualStats.BlkioStats) } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/cgroups/fs/stats_util_test.go0000644000175000017500000000604212524212370026033 0ustar tianontianonpackage fs import ( "fmt" "log" "testing" "github.com/docker/libcontainer/cgroups" ) func blkioStatEntryEquals(expected, actual []cgroups.BlkioStatEntry) error { if len(expected) != len(actual) { return fmt.Errorf("blkioStatEntries length do not match") } for i, expValue := range expected { actValue := actual[i] if expValue != actValue { return fmt.Errorf("Expected blkio stat entry %v but found %v", expValue, actValue) } } return nil } func expectBlkioStatsEquals(t *testing.T, expected, actual cgroups.BlkioStats) { if err := blkioStatEntryEquals(expected.IoServiceBytesRecursive, actual.IoServiceBytesRecursive); err != nil { log.Printf("blkio IoServiceBytesRecursive do not match - %s\n", err) t.Fail() } if err := blkioStatEntryEquals(expected.IoServicedRecursive, actual.IoServicedRecursive); err != nil { log.Printf("blkio IoServicedRecursive do not match - %s\n", err) t.Fail() } if err := blkioStatEntryEquals(expected.IoQueuedRecursive, actual.IoQueuedRecursive); err != nil { log.Printf("blkio IoQueuedRecursive do not match - %s\n", err) t.Fail() } if err := blkioStatEntryEquals(expected.SectorsRecursive, actual.SectorsRecursive); err != nil { log.Printf("blkio SectorsRecursive do not match - %s\n", err) t.Fail() } if err := blkioStatEntryEquals(expected.IoServiceTimeRecursive, actual.IoServiceTimeRecursive); err != nil { log.Printf("blkio IoServiceTimeRecursive do not match - %s\n", err) t.Fail() } if err := blkioStatEntryEquals(expected.IoWaitTimeRecursive, actual.IoWaitTimeRecursive); err != nil { log.Printf("blkio IoWaitTimeRecursive do not match - %s\n", err) t.Fail() } if err := blkioStatEntryEquals(expected.IoMergedRecursive, actual.IoMergedRecursive); err != nil { log.Printf("blkio IoMergedRecursive do not match - %v vs %v\n", expected.IoMergedRecursive, actual.IoMergedRecursive) t.Fail() } if err := blkioStatEntryEquals(expected.IoTimeRecursive, actual.IoTimeRecursive); err != nil { log.Printf("blkio IoTimeRecursive do not match - %s\n", err) t.Fail() } } func expectThrottlingDataEquals(t *testing.T, expected, actual cgroups.ThrottlingData) { if expected != actual { log.Printf("Expected throttling data %v but found %v\n", expected, actual) t.Fail() } } func expectMemoryStatEquals(t *testing.T, expected, actual cgroups.MemoryStats) { if expected.Usage != actual.Usage { log.Printf("Expected memory usage %d but found %d\n", expected.Usage, actual.Usage) t.Fail() } if expected.MaxUsage != actual.MaxUsage { log.Printf("Expected memory max usage %d but found %d\n", expected.MaxUsage, actual.MaxUsage) t.Fail() } for key, expValue := range expected.Stats { actValue, ok := actual.Stats[key] if !ok { log.Printf("Expected memory stat key %s not found\n", key) t.Fail() } if expValue != actValue { log.Printf("Expected memory stat value %d but found %d\n", expValue, actValue) t.Fail() } } if expected.Failcnt != actual.Failcnt { log.Printf("Expected memory failcnt %d but found %d\n", expected.Failcnt, actual.Failcnt) t.Fail() } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/cgroups/fs/apply_raw.go0000644000175000017500000001331512524212370024600 0ustar tianontianonpackage fs import ( "io/ioutil" "os" "path/filepath" "strconv" "sync" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" ) var ( subsystems = map[string]subsystem{ "devices": &DevicesGroup{}, "memory": &MemoryGroup{}, "cpu": &CpuGroup{}, "cpuset": &CpusetGroup{}, "cpuacct": &CpuacctGroup{}, "blkio": &BlkioGroup{}, "perf_event": &PerfEventGroup{}, "freezer": &FreezerGroup{}, } CgroupProcesses = "cgroup.procs" ) type subsystem interface { // Returns the stats, as 'stats', corresponding to the cgroup under 'path'. GetStats(path string, stats *cgroups.Stats) error // Removes the cgroup represented by 'data'. Remove(*data) error // Creates and joins the cgroup represented by data. Apply(*data) error // Set the cgroup represented by cgroup. Set(path string, cgroup *configs.Cgroup) error } type Manager struct { Cgroups *configs.Cgroup Paths map[string]string } // The absolute path to the root of the cgroup hierarchies. var cgroupRootLock sync.Mutex var cgroupRoot string // Gets the cgroupRoot. func getCgroupRoot() (string, error) { cgroupRootLock.Lock() defer cgroupRootLock.Unlock() if cgroupRoot != "" { return cgroupRoot, nil } root, err := cgroups.FindCgroupMountpointDir() if err != nil { return "", err } if _, err := os.Stat(root); err != nil { return "", err } cgroupRoot = root return cgroupRoot, nil } type data struct { root string cgroup string c *configs.Cgroup pid int } func (m *Manager) Apply(pid int) error { if m.Cgroups == nil { return nil } d, err := getCgroupData(m.Cgroups, pid) if err != nil { return err } paths := make(map[string]string) defer func() { if err != nil { cgroups.RemovePaths(paths) } }() for name, sys := range subsystems { if err := sys.Apply(d); err != nil { return err } // TODO: Apply should, ideally, be reentrant or be broken up into a separate // create and join phase so that the cgroup hierarchy for a container can be // created then join consists of writing the process pids to cgroup.procs p, err := d.path(name) if err != nil { if cgroups.IsNotFound(err) { continue } return err } paths[name] = p } m.Paths = paths return nil } func (m *Manager) Destroy() error { return cgroups.RemovePaths(m.Paths) } func (m *Manager) GetPaths() map[string]string { return m.Paths } // Symmetrical public function to update device based cgroups. Also available // in the systemd implementation. func ApplyDevices(c *configs.Cgroup, pid int) error { d, err := getCgroupData(c, pid) if err != nil { return err } devices := subsystems["devices"] return devices.Apply(d) } func (m *Manager) GetStats() (*cgroups.Stats, error) { stats := cgroups.NewStats() for name, path := range m.Paths { sys, ok := subsystems[name] if !ok || !cgroups.PathExists(path) { continue } if err := sys.GetStats(path, stats); err != nil { return nil, err } } return stats, nil } func (m *Manager) Set(container *configs.Config) error { for name, path := range m.Paths { sys, ok := subsystems[name] if !ok || !cgroups.PathExists(path) { continue } if err := sys.Set(path, container.Cgroups); err != nil { return err } } return nil } // Freeze toggles the container's freezer cgroup depending on the state // provided func (m *Manager) Freeze(state configs.FreezerState) error { d, err := getCgroupData(m.Cgroups, 0) if err != nil { return err } dir, err := d.path("freezer") if err != nil { return err } prevState := m.Cgroups.Freezer m.Cgroups.Freezer = state freezer := subsystems["freezer"] err = freezer.Set(dir, m.Cgroups) if err != nil { m.Cgroups.Freezer = prevState return err } return nil } func (m *Manager) GetPids() ([]int, error) { d, err := getCgroupData(m.Cgroups, 0) if err != nil { return nil, err } dir, err := d.path("devices") if err != nil { return nil, err } return cgroups.ReadProcsFile(dir) } func getCgroupData(c *configs.Cgroup, pid int) (*data, error) { root, err := getCgroupRoot() if err != nil { return nil, err } cgroup := c.Name if c.Parent != "" { cgroup = filepath.Join(c.Parent, cgroup) } return &data{ root: root, cgroup: cgroup, c: c, pid: pid, }, nil } func (raw *data) parent(subsystem, mountpoint string) (string, error) { initPath, err := cgroups.GetInitCgroupDir(subsystem) if err != nil { return "", err } return filepath.Join(mountpoint, initPath), nil } func (raw *data) path(subsystem string) (string, error) { mnt, err := cgroups.FindCgroupMountpoint(subsystem) // If we didn't mount the subsystem, there is no point we make the path. if err != nil { return "", err } // If the cgroup name/path is absolute do not look relative to the cgroup of the init process. if filepath.IsAbs(raw.cgroup) { return filepath.Join(raw.root, subsystem, raw.cgroup), nil } parent, err := raw.parent(subsystem, mnt) if err != nil { return "", err } return filepath.Join(parent, raw.cgroup), nil } func (raw *data) join(subsystem string) (string, error) { path, err := raw.path(subsystem) if err != nil { return "", err } if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { return "", err } if err := writeFile(path, CgroupProcesses, strconv.Itoa(raw.pid)); err != nil { return "", err } return path, nil } func writeFile(dir, file, data string) error { return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700) } func readFile(dir, file string) (string, error) { data, err := ioutil.ReadFile(filepath.Join(dir, file)) return string(data), err } func removePath(p string, err error) error { if err != nil { return err } if p != "" { return os.RemoveAll(p) } return nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/cgroups/fs/memory.go0000644000175000017500000000525512524212370024116 0ustar tianontianonpackage fs import ( "bufio" "fmt" "os" "path/filepath" "strconv" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" ) type MemoryGroup struct { } func (s *MemoryGroup) Apply(d *data) error { dir, err := d.join("memory") // only return an error for memory if it was specified if err != nil && (d.c.Memory != 0 || d.c.MemoryReservation != 0 || d.c.MemorySwap != 0) { return err } defer func() { if err != nil { os.RemoveAll(dir) } }() if err := s.Set(dir, d.c); err != nil { return err } return nil } func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error { if cgroup.Memory != 0 { if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Memory, 10)); err != nil { return err } } if cgroup.MemoryReservation != 0 { if err := writeFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.MemoryReservation, 10)); err != nil { return err } } // By default, MemorySwap is set to twice the size of Memory. if cgroup.MemorySwap == 0 && cgroup.Memory != 0 { if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Memory*2, 10)); err != nil { return err } } if cgroup.MemorySwap > 0 { if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.MemorySwap, 10)); err != nil { return err } } if cgroup.OomKillDisable { if err := writeFile(path, "memory.oom_control", "1"); err != nil { return err } } return nil } func (s *MemoryGroup) Remove(d *data) error { return removePath(d.path("memory")) } func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error { // Set stats from memory.stat. statsFile, err := os.Open(filepath.Join(path, "memory.stat")) if err != nil { if os.IsNotExist(err) { return nil } return err } defer statsFile.Close() sc := bufio.NewScanner(statsFile) for sc.Scan() { t, v, err := getCgroupParamKeyValue(sc.Text()) if err != nil { return fmt.Errorf("failed to parse memory.stat (%q) - %v", sc.Text(), err) } stats.MemoryStats.Stats[t] = v } // Set memory usage and max historical usage. value, err := getCgroupParamUint(path, "memory.usage_in_bytes") if err != nil { return fmt.Errorf("failed to parse memory.usage_in_bytes - %v", err) } stats.MemoryStats.Usage = value value, err = getCgroupParamUint(path, "memory.max_usage_in_bytes") if err != nil { return fmt.Errorf("failed to parse memory.max_usage_in_bytes - %v", err) } stats.MemoryStats.MaxUsage = value value, err = getCgroupParamUint(path, "memory.failcnt") if err != nil { return fmt.Errorf("failed to parse memory.failcnt - %v", err) } stats.MemoryStats.Failcnt = value return nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/cgroups/fs/memory_test.go0000644000175000017500000001600012524212370025143 0ustar tianontianonpackage fs import ( "strconv" "testing" "github.com/docker/libcontainer/cgroups" ) const ( memoryStatContents = `cache 512 rss 1024` memoryUsageContents = "2048\n" memoryMaxUsageContents = "4096\n" memoryFailcnt = "100\n" ) func TestMemorySetMemory(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() const ( memoryBefore = 314572800 // 300M memoryAfter = 524288000 // 500M reservationBefore = 209715200 // 200M reservationAfter = 314572800 // 300M ) helper.writeFileContents(map[string]string{ "memory.limit_in_bytes": strconv.Itoa(memoryBefore), "memory.soft_limit_in_bytes": strconv.Itoa(reservationBefore), }) helper.CgroupData.c.Memory = memoryAfter helper.CgroupData.c.MemoryReservation = reservationAfter memory := &MemoryGroup{} if err := memory.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { t.Fatal(err) } value, err := getCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes") if err != nil { t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err) } if value != memoryAfter { t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.") } value, err = getCgroupParamUint(helper.CgroupPath, "memory.soft_limit_in_bytes") if err != nil { t.Fatalf("Failed to parse memory.soft_limit_in_bytes - %s", err) } if value != reservationAfter { t.Fatal("Got the wrong value, set memory.soft_limit_in_bytes failed.") } } func TestMemorySetMemoryswap(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() const ( memoryswapBefore = 314572800 // 300M memoryswapAfter = 524288000 // 500M ) helper.writeFileContents(map[string]string{ "memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore), }) helper.CgroupData.c.MemorySwap = memoryswapAfter memory := &MemoryGroup{} if err := memory.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { t.Fatal(err) } value, err := getCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes") if err != nil { t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err) } if value != memoryswapAfter { t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.") } } func TestMemorySetMemoryswapDefault(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() const ( memoryBefore = 209715200 // 200M memoryAfter = 314572800 // 300M memoryswapAfter = 629145600 // 300M*2 ) helper.writeFileContents(map[string]string{ "memory.limit_in_bytes": strconv.Itoa(memoryBefore), }) helper.CgroupData.c.Memory = memoryAfter memory := &MemoryGroup{} if err := memory.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { t.Fatal(err) } value, err := getCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes") if err != nil { t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err) } if value != memoryswapAfter { t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.") } } func TestMemoryStats(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "memory.stat": memoryStatContents, "memory.usage_in_bytes": memoryUsageContents, "memory.max_usage_in_bytes": memoryMaxUsageContents, "memory.failcnt": memoryFailcnt, }) memory := &MemoryGroup{} actualStats := *cgroups.NewStats() err := memory.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatal(err) } expectedStats := cgroups.MemoryStats{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Stats: map[string]uint64{"cache": 512, "rss": 1024}} expectMemoryStatEquals(t, expectedStats, actualStats.MemoryStats) } func TestMemoryStatsNoStatFile(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "memory.usage_in_bytes": memoryUsageContents, "memory.max_usage_in_bytes": memoryMaxUsageContents, }) memory := &MemoryGroup{} actualStats := *cgroups.NewStats() err := memory.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatal(err) } } func TestMemoryStatsNoUsageFile(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "memory.stat": memoryStatContents, "memory.max_usage_in_bytes": memoryMaxUsageContents, }) memory := &MemoryGroup{} actualStats := *cgroups.NewStats() err := memory.GetStats(helper.CgroupPath, &actualStats) if err == nil { t.Fatal("Expected failure") } } func TestMemoryStatsNoMaxUsageFile(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "memory.stat": memoryStatContents, "memory.usage_in_bytes": memoryUsageContents, }) memory := &MemoryGroup{} actualStats := *cgroups.NewStats() err := memory.GetStats(helper.CgroupPath, &actualStats) if err == nil { t.Fatal("Expected failure") } } func TestMemoryStatsBadStatFile(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "memory.stat": "rss rss", "memory.usage_in_bytes": memoryUsageContents, "memory.max_usage_in_bytes": memoryMaxUsageContents, }) memory := &MemoryGroup{} actualStats := *cgroups.NewStats() err := memory.GetStats(helper.CgroupPath, &actualStats) if err == nil { t.Fatal("Expected failure") } } func TestMemoryStatsBadUsageFile(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "memory.stat": memoryStatContents, "memory.usage_in_bytes": "bad", "memory.max_usage_in_bytes": memoryMaxUsageContents, }) memory := &MemoryGroup{} actualStats := *cgroups.NewStats() err := memory.GetStats(helper.CgroupPath, &actualStats) if err == nil { t.Fatal("Expected failure") } } func TestMemoryStatsBadMaxUsageFile(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "memory.stat": memoryStatContents, "memory.usage_in_bytes": memoryUsageContents, "memory.max_usage_in_bytes": "bad", }) memory := &MemoryGroup{} actualStats := *cgroups.NewStats() err := memory.GetStats(helper.CgroupPath, &actualStats) if err == nil { t.Fatal("Expected failure") } } func TestMemorySetOomControl(t *testing.T) { helper := NewCgroupTestUtil("memory", t) defer helper.cleanup() const ( oom_kill_disable = 1 // disable oom killer, default is 0 ) helper.writeFileContents(map[string]string{ "memory.oom_control": strconv.Itoa(oom_kill_disable), }) memory := &MemoryGroup{} if err := memory.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { t.Fatal(err) } value, err := getCgroupParamUint(helper.CgroupPath, "memory.oom_control") if err != nil { t.Fatalf("Failed to parse memory.oom_control - %s", err) } if value != oom_kill_disable { t.Fatalf("Got the wrong value, set memory.oom_control failed.") } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/cgroups/fs/cpu.go0000644000175000017500000000320112524212370023362 0ustar tianontianonpackage fs import ( "bufio" "os" "path/filepath" "strconv" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" ) type CpuGroup struct { } func (s *CpuGroup) Apply(d *data) error { // We always want to join the cpu group, to allow fair cpu scheduling // on a container basis dir, err := d.join("cpu") if err != nil { return err } if err := s.Set(dir, d.c); err != nil { return err } return nil } func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error { if cgroup.CpuShares != 0 { if err := writeFile(path, "cpu.shares", strconv.FormatInt(cgroup.CpuShares, 10)); err != nil { return err } } if cgroup.CpuPeriod != 0 { if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatInt(cgroup.CpuPeriod, 10)); err != nil { return err } } if cgroup.CpuQuota != 0 { if err := writeFile(path, "cpu.cfs_quota_us", strconv.FormatInt(cgroup.CpuQuota, 10)); err != nil { return err } } return nil } func (s *CpuGroup) Remove(d *data) error { return removePath(d.path("cpu")) } func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error { f, err := os.Open(filepath.Join(path, "cpu.stat")) if err != nil { if os.IsNotExist(err) { return nil } return err } defer f.Close() sc := bufio.NewScanner(f) for sc.Scan() { t, v, err := getCgroupParamKeyValue(sc.Text()) if err != nil { return err } switch t { case "nr_periods": stats.CpuStats.ThrottlingData.Periods = v case "nr_throttled": stats.CpuStats.ThrottlingData.ThrottledPeriods = v case "throttled_time": stats.CpuStats.ThrottlingData.ThrottledTime = v } } return nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/cgroups/fs/cpuset.go0000644000175000017500000000613512524212370024107 0ustar tianontianonpackage fs import ( "bytes" "io/ioutil" "os" "path/filepath" "strconv" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" ) type CpusetGroup struct { } func (s *CpusetGroup) Apply(d *data) error { dir, err := d.path("cpuset") if err != nil { return err } return s.ApplyDir(dir, d.c, d.pid) } func (s *CpusetGroup) Set(path string, cgroup *configs.Cgroup) error { if cgroup.CpusetCpus != "" { if err := writeFile(path, "cpuset.cpus", cgroup.CpusetCpus); err != nil { return err } } if cgroup.CpusetMems != "" { if err := writeFile(path, "cpuset.mems", cgroup.CpusetMems); err != nil { return err } } return nil } func (s *CpusetGroup) Remove(d *data) error { return removePath(d.path("cpuset")) } func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error { return nil } func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) error { if err := s.ensureParent(dir); err != nil { return err } // because we are not using d.join we need to place the pid into the procs file // unlike the other subsystems if err := writeFile(dir, "cgroup.procs", strconv.Itoa(pid)); err != nil { return err } // the default values inherit from parent cgroup are already set in // s.ensureParent, cover these if we have our own if err := s.Set(dir, cgroup); err != nil { return err } return nil } func (s *CpusetGroup) getSubsystemSettings(parent string) (cpus []byte, mems []byte, err error) { if cpus, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.cpus")); err != nil { return } if mems, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.mems")); err != nil { return } return cpus, mems, nil } // ensureParent ensures that the parent directory of current is created // with the proper cpus and mems files copied from it's parent if the values // are a file with a new line char func (s *CpusetGroup) ensureParent(current string) error { parent := filepath.Dir(current) if _, err := os.Stat(parent); err != nil { if !os.IsNotExist(err) { return err } if err := s.ensureParent(parent); err != nil { return err } } if err := os.MkdirAll(current, 0755); err != nil && !os.IsExist(err) { return err } return s.copyIfNeeded(current, parent) } // copyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent // directory to the current directory if the file's contents are 0 func (s *CpusetGroup) copyIfNeeded(current, parent string) error { var ( err error currentCpus, currentMems []byte parentCpus, parentMems []byte ) if currentCpus, currentMems, err = s.getSubsystemSettings(current); err != nil { return err } if parentCpus, parentMems, err = s.getSubsystemSettings(parent); err != nil { return err } if s.isEmpty(currentCpus) { if err := writeFile(current, "cpuset.cpus", string(parentCpus)); err != nil { return err } } if s.isEmpty(currentMems) { if err := writeFile(current, "cpuset.mems", string(parentMems)); err != nil { return err } } return nil } func (s *CpusetGroup) isEmpty(b []byte) bool { return len(bytes.Trim(b, "\n")) == 0 } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/cgroups/fs/perf_event.go0000644000175000017500000000117712524212370024742 0ustar tianontianonpackage fs import ( "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" ) type PerfEventGroup struct { } func (s *PerfEventGroup) Apply(d *data) error { // we just want to join this group even though we don't set anything if _, err := d.join("perf_event"); err != nil && !cgroups.IsNotFound(err) { return err } return nil } func (s *PerfEventGroup) Set(path string, cgroup *configs.Cgroup) error { return nil } func (s *PerfEventGroup) Remove(d *data) error { return removePath(d.path("perf_event")) } func (s *PerfEventGroup) GetStats(path string, stats *cgroups.Stats) error { return nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/cgroups/fs/cpuacct.go0000644000175000017500000000574412524212370024233 0ustar tianontianonpackage fs import ( "fmt" "io/ioutil" "path/filepath" "strconv" "strings" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/system" ) const ( cgroupCpuacctStat = "cpuacct.stat" nanosecondsInSecond = 1000000000 ) var clockTicks = uint64(system.GetClockTicks()) type CpuacctGroup struct { } func (s *CpuacctGroup) Apply(d *data) error { // we just want to join this group even though we don't set anything if _, err := d.join("cpuacct"); err != nil && !cgroups.IsNotFound(err) { return err } return nil } func (s *CpuacctGroup) Set(path string, cgroup *configs.Cgroup) error { return nil } func (s *CpuacctGroup) Remove(d *data) error { return removePath(d.path("cpuacct")) } func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error { userModeUsage, kernelModeUsage, err := getCpuUsageBreakdown(path) if err != nil { return err } totalUsage, err := getCgroupParamUint(path, "cpuacct.usage") if err != nil { return err } percpuUsage, err := getPercpuUsage(path) if err != nil { return err } stats.CpuStats.CpuUsage.TotalUsage = totalUsage stats.CpuStats.CpuUsage.PercpuUsage = percpuUsage stats.CpuStats.CpuUsage.UsageInUsermode = userModeUsage stats.CpuStats.CpuUsage.UsageInKernelmode = kernelModeUsage return nil } // Returns user and kernel usage breakdown in nanoseconds. func getCpuUsageBreakdown(path string) (uint64, uint64, error) { userModeUsage := uint64(0) kernelModeUsage := uint64(0) const ( userField = "user" systemField = "system" ) // Expected format: // user // system data, err := ioutil.ReadFile(filepath.Join(path, cgroupCpuacctStat)) if err != nil { return 0, 0, err } fields := strings.Fields(string(data)) if len(fields) != 4 { return 0, 0, fmt.Errorf("failure - %s is expected to have 4 fields", filepath.Join(path, cgroupCpuacctStat)) } if fields[0] != userField { return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[0], cgroupCpuacctStat, userField) } if fields[2] != systemField { return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[2], cgroupCpuacctStat, systemField) } if userModeUsage, err = strconv.ParseUint(fields[1], 10, 64); err != nil { return 0, 0, err } if kernelModeUsage, err = strconv.ParseUint(fields[3], 10, 64); err != nil { return 0, 0, err } return (userModeUsage * nanosecondsInSecond) / clockTicks, (kernelModeUsage * nanosecondsInSecond) / clockTicks, nil } func getPercpuUsage(path string) ([]uint64, error) { percpuUsage := []uint64{} data, err := ioutil.ReadFile(filepath.Join(path, "cpuacct.usage_percpu")) if err != nil { return percpuUsage, err } for _, value := range strings.Fields(string(data)) { value, err := strconv.ParseUint(value, 10, 64) if err != nil { return percpuUsage, fmt.Errorf("Unable to convert param value to uint64: %s", err) } percpuUsage = append(percpuUsage, value) } return percpuUsage, nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/cgroups/fs/devices.go0000644000175000017500000000152412524212370024223 0ustar tianontianonpackage fs import ( "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" ) type DevicesGroup struct { } func (s *DevicesGroup) Apply(d *data) error { dir, err := d.join("devices") if err != nil { return err } if err := s.Set(dir, d.c); err != nil { return err } return nil } func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error { if !cgroup.AllowAllDevices { if err := writeFile(path, "devices.deny", "a"); err != nil { return err } for _, dev := range cgroup.AllowedDevices { if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil { return err } } } return nil } func (s *DevicesGroup) Remove(d *data) error { return removePath(d.path("devices")) } func (s *DevicesGroup) GetStats(path string, stats *cgroups.Stats) error { return nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/cgroups/fs/cpu_test.go0000644000175000017500000000634212524212370024432 0ustar tianontianonpackage fs import ( "fmt" "strconv" "testing" "github.com/docker/libcontainer/cgroups" ) func TestCpuSetShares(t *testing.T) { helper := NewCgroupTestUtil("cpu", t) defer helper.cleanup() const ( sharesBefore = 1024 sharesAfter = 512 ) helper.writeFileContents(map[string]string{ "cpu.shares": strconv.Itoa(sharesBefore), }) helper.CgroupData.c.CpuShares = sharesAfter cpu := &CpuGroup{} if err := cpu.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { t.Fatal(err) } value, err := getCgroupParamUint(helper.CgroupPath, "cpu.shares") if err != nil { t.Fatalf("Failed to parse cpu.shares - %s", err) } if value != sharesAfter { t.Fatal("Got the wrong value, set cpu.shares failed.") } } func TestCpuSetBandWidth(t *testing.T) { helper := NewCgroupTestUtil("cpu", t) defer helper.cleanup() const ( quotaBefore = 8000 quotaAfter = 5000 periodBefore = 10000 periodAfter = 7000 ) helper.writeFileContents(map[string]string{ "cpu.cfs_quota_us": strconv.Itoa(quotaBefore), "cpu.cfs_period_us": strconv.Itoa(periodBefore), }) helper.CgroupData.c.CpuQuota = quotaAfter helper.CgroupData.c.CpuPeriod = periodAfter cpu := &CpuGroup{} if err := cpu.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { t.Fatal(err) } quota, err := getCgroupParamUint(helper.CgroupPath, "cpu.cfs_quota_us") if err != nil { t.Fatalf("Failed to parse cpu.cfs_quota_us - %s", err) } if quota != quotaAfter { t.Fatal("Got the wrong value, set cpu.cfs_quota_us failed.") } period, err := getCgroupParamUint(helper.CgroupPath, "cpu.cfs_period_us") if err != nil { t.Fatalf("Failed to parse cpu.cfs_period_us - %s", err) } if period != periodAfter { t.Fatal("Got the wrong value, set cpu.cfs_period_us failed.") } } func TestCpuStats(t *testing.T) { helper := NewCgroupTestUtil("cpu", t) defer helper.cleanup() const ( kNrPeriods = 2000 kNrThrottled = 200 kThrottledTime = uint64(18446744073709551615) ) cpuStatContent := fmt.Sprintf("nr_periods %d\n nr_throttled %d\n throttled_time %d\n", kNrPeriods, kNrThrottled, kThrottledTime) helper.writeFileContents(map[string]string{ "cpu.stat": cpuStatContent, }) cpu := &CpuGroup{} actualStats := *cgroups.NewStats() err := cpu.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatal(err) } expectedStats := cgroups.ThrottlingData{ Periods: kNrPeriods, ThrottledPeriods: kNrThrottled, ThrottledTime: kThrottledTime} expectThrottlingDataEquals(t, expectedStats, actualStats.CpuStats.ThrottlingData) } func TestNoCpuStatFile(t *testing.T) { helper := NewCgroupTestUtil("cpu", t) defer helper.cleanup() cpu := &CpuGroup{} actualStats := *cgroups.NewStats() err := cpu.GetStats(helper.CgroupPath, &actualStats) if err != nil { t.Fatal("Expected not to fail, but did") } } func TestInvalidCpuStat(t *testing.T) { helper := NewCgroupTestUtil("cpu", t) defer helper.cleanup() cpuStatContent := `nr_periods 2000 nr_throttled 200 throttled_time fortytwo` helper.writeFileContents(map[string]string{ "cpu.stat": cpuStatContent, }) cpu := &CpuGroup{} actualStats := *cgroups.NewStats() err := cpu.GetStats(helper.CgroupPath, &actualStats) if err == nil { t.Fatal("Expected failed stat parsing.") } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/cgroups/fs/devices_test.go0000644000175000017500000000165312524212370025265 0ustar tianontianonpackage fs import ( "testing" "github.com/docker/libcontainer/configs" ) var ( allowedDevices = []*configs.Device{ { Path: "/dev/zero", Type: 'c', Major: 1, Minor: 5, Permissions: "rwm", FileMode: 0666, }, } allowedList = "c 1:5 rwm" ) func TestDevicesSetAllow(t *testing.T) { helper := NewCgroupTestUtil("devices", t) defer helper.cleanup() helper.writeFileContents(map[string]string{ "devices.deny": "a", }) helper.CgroupData.c.AllowAllDevices = false helper.CgroupData.c.AllowedDevices = allowedDevices devices := &DevicesGroup{} if err := devices.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { t.Fatal(err) } value, err := getCgroupParamString(helper.CgroupPath, "devices.allow") if err != nil { t.Fatalf("Failed to parse devices.allow - %s", err) } if value != allowedList { t.Fatal("Got the wrong value, set devices.allow failed.") } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/cgroups/fs/blkio.go0000644000175000017500000001111412524212370023675 0ustar tianontianonpackage fs import ( "bufio" "fmt" "os" "path/filepath" "strconv" "strings" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" ) type BlkioGroup struct { } func (s *BlkioGroup) Apply(d *data) error { dir, err := d.join("blkio") if err != nil && !cgroups.IsNotFound(err) { return err } if err := s.Set(dir, d.c); err != nil { return err } return nil } func (s *BlkioGroup) Set(path string, cgroup *configs.Cgroup) error { if cgroup.BlkioWeight != 0 { if err := writeFile(path, "blkio.weight", strconv.FormatInt(cgroup.BlkioWeight, 10)); err != nil { return err } } return nil } func (s *BlkioGroup) Remove(d *data) error { return removePath(d.path("blkio")) } /* examples: blkio.sectors 8:0 6792 blkio.io_service_bytes 8:0 Read 1282048 8:0 Write 2195456 8:0 Sync 2195456 8:0 Async 1282048 8:0 Total 3477504 Total 3477504 blkio.io_serviced 8:0 Read 124 8:0 Write 104 8:0 Sync 104 8:0 Async 124 8:0 Total 228 Total 228 blkio.io_queued 8:0 Read 0 8:0 Write 0 8:0 Sync 0 8:0 Async 0 8:0 Total 0 Total 0 */ func splitBlkioStatLine(r rune) bool { return r == ' ' || r == ':' } func getBlkioStat(path string) ([]cgroups.BlkioStatEntry, error) { var blkioStats []cgroups.BlkioStatEntry f, err := os.Open(path) if err != nil { if os.IsNotExist(err) { return blkioStats, nil } return nil, err } defer f.Close() sc := bufio.NewScanner(f) for sc.Scan() { // format: dev type amount fields := strings.FieldsFunc(sc.Text(), splitBlkioStatLine) if len(fields) < 3 { if len(fields) == 2 && fields[0] == "Total" { // skip total line continue } else { return nil, fmt.Errorf("Invalid line found while parsing %s: %s", path, sc.Text()) } } v, err := strconv.ParseUint(fields[0], 10, 64) if err != nil { return nil, err } major := v v, err = strconv.ParseUint(fields[1], 10, 64) if err != nil { return nil, err } minor := v op := "" valueField := 2 if len(fields) == 4 { op = fields[2] valueField = 3 } v, err = strconv.ParseUint(fields[valueField], 10, 64) if err != nil { return nil, err } blkioStats = append(blkioStats, cgroups.BlkioStatEntry{Major: major, Minor: minor, Op: op, Value: v}) } return blkioStats, nil } func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error { // Try to read CFQ stats available on all CFQ enabled kernels first if blkioStats, err := getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err == nil && blkioStats != nil { return getCFQStats(path, stats) } return getStats(path, stats) // Use generic stats as fallback } func getCFQStats(path string, stats *cgroups.Stats) error { var blkioStats []cgroups.BlkioStatEntry var err error if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.sectors_recursive")); err != nil { return err } stats.BlkioStats.SectorsRecursive = blkioStats if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_bytes_recursive")); err != nil { return err } stats.BlkioStats.IoServiceBytesRecursive = blkioStats if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err != nil { return err } stats.BlkioStats.IoServicedRecursive = blkioStats if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_queued_recursive")); err != nil { return err } stats.BlkioStats.IoQueuedRecursive = blkioStats if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_time_recursive")); err != nil { return err } stats.BlkioStats.IoServiceTimeRecursive = blkioStats if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_wait_time_recursive")); err != nil { return err } stats.BlkioStats.IoWaitTimeRecursive = blkioStats if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_merged_recursive")); err != nil { return err } stats.BlkioStats.IoMergedRecursive = blkioStats if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.time_recursive")); err != nil { return err } stats.BlkioStats.IoTimeRecursive = blkioStats return nil } func getStats(path string, stats *cgroups.Stats) error { var blkioStats []cgroups.BlkioStatEntry var err error if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.throttle.io_service_bytes")); err != nil { return err } stats.BlkioStats.IoServiceBytesRecursive = blkioStats if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.throttle.io_serviced")); err != nil { return err } stats.BlkioStats.IoServicedRecursive = blkioStats return nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/cgroups/fs/util_test.go0000644000175000017500000000255512524212370024622 0ustar tianontianon/* Utility for testing cgroup operations. Creates a mock of the cgroup filesystem for the duration of the test. */ package fs import ( "io/ioutil" "os" "path/filepath" "testing" "github.com/docker/libcontainer/configs" ) type cgroupTestUtil struct { // data to use in tests. CgroupData *data // Path to the mock cgroup directory. CgroupPath string // Temporary directory to store mock cgroup filesystem. tempDir string t *testing.T } // Creates a new test util for the specified subsystem func NewCgroupTestUtil(subsystem string, t *testing.T) *cgroupTestUtil { d := &data{ c: &configs.Cgroup{}, } tempDir, err := ioutil.TempDir("", "cgroup_test") if err != nil { t.Fatal(err) } d.root = tempDir testCgroupPath := filepath.Join(d.root, subsystem) if err != nil { t.Fatal(err) } // Ensure the full mock cgroup path exists. err = os.MkdirAll(testCgroupPath, 0755) if err != nil { t.Fatal(err) } return &cgroupTestUtil{CgroupData: d, CgroupPath: testCgroupPath, tempDir: tempDir, t: t} } func (c *cgroupTestUtil) cleanup() { os.RemoveAll(c.tempDir) } // Write the specified contents on the mock of the specified cgroup files. func (c *cgroupTestUtil) writeFileContents(fileContents map[string]string) { for file, contents := range fileContents { err := writeFile(c.CgroupPath, file, contents) if err != nil { c.t.Fatal(err) } } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/cgroups/fs/utils_test.go0000644000175000017500000000402212524212370024774 0ustar tianontianonpackage fs import ( "io/ioutil" "math" "os" "path/filepath" "strconv" "testing" ) const ( cgroupFile = "cgroup.file" floatValue = 2048.0 floatString = "2048" ) func TestGetCgroupParamsInt(t *testing.T) { // Setup tempdir. tempDir, err := ioutil.TempDir("", "cgroup_utils_test") if err != nil { t.Fatal(err) } defer os.RemoveAll(tempDir) tempFile := filepath.Join(tempDir, cgroupFile) // Success. err = ioutil.WriteFile(tempFile, []byte(floatString), 0755) if err != nil { t.Fatal(err) } value, err := getCgroupParamUint(tempDir, cgroupFile) if err != nil { t.Fatal(err) } else if value != floatValue { t.Fatalf("Expected %d to equal %f", value, floatValue) } // Success with new line. err = ioutil.WriteFile(tempFile, []byte(floatString+"\n"), 0755) if err != nil { t.Fatal(err) } value, err = getCgroupParamUint(tempDir, cgroupFile) if err != nil { t.Fatal(err) } else if value != floatValue { t.Fatalf("Expected %d to equal %f", value, floatValue) } // Success with negative values err = ioutil.WriteFile(tempFile, []byte("-12345"), 0755) if err != nil { t.Fatal(err) } value, err = getCgroupParamUint(tempDir, cgroupFile) if err != nil { t.Fatal(err) } else if value != 0 { t.Fatalf("Expected %d to equal %d", value, 0) } // Success with negative values lesser than min int64 s := strconv.FormatFloat(math.MinInt64, 'f', -1, 64) err = ioutil.WriteFile(tempFile, []byte(s), 0755) if err != nil { t.Fatal(err) } value, err = getCgroupParamUint(tempDir, cgroupFile) if err != nil { t.Fatal(err) } else if value != 0 { t.Fatalf("Expected %d to equal %d", value, 0) } // Not a float. err = ioutil.WriteFile(tempFile, []byte("not-a-float"), 0755) if err != nil { t.Fatal(err) } _, err = getCgroupParamUint(tempDir, cgroupFile) if err == nil { t.Fatal("Expecting error, got none") } // Unknown file. err = os.Remove(tempFile) if err != nil { t.Fatal(err) } _, err = getCgroupParamUint(tempDir, cgroupFile) if err == nil { t.Fatal("Expecting error, got none") } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/cgroups/fs/cpuset_test.go0000644000175000017500000000242612524212370025145 0ustar tianontianonpackage fs import ( "testing" ) func TestCpusetSetCpus(t *testing.T) { helper := NewCgroupTestUtil("cpuset", t) defer helper.cleanup() const ( cpusBefore = "0" cpusAfter = "1-3" ) helper.writeFileContents(map[string]string{ "cpuset.cpus": cpusBefore, }) helper.CgroupData.c.CpusetCpus = cpusAfter cpuset := &CpusetGroup{} if err := cpuset.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { t.Fatal(err) } value, err := getCgroupParamString(helper.CgroupPath, "cpuset.cpus") if err != nil { t.Fatalf("Failed to parse cpuset.cpus - %s", err) } if value != cpusAfter { t.Fatal("Got the wrong value, set cpuset.cpus failed.") } } func TestCpusetSetMems(t *testing.T) { helper := NewCgroupTestUtil("cpuset", t) defer helper.cleanup() const ( memsBefore = "0" memsAfter = "1" ) helper.writeFileContents(map[string]string{ "cpuset.mems": memsBefore, }) helper.CgroupData.c.CpusetMems = memsAfter cpuset := &CpusetGroup{} if err := cpuset.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { t.Fatal(err) } value, err := getCgroupParamString(helper.CgroupPath, "cpuset.mems") if err != nil { t.Fatalf("Failed to parse cpuset.mems - %s", err) } if value != memsAfter { t.Fatal("Got the wrong value, set cpuset.mems failed.") } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/cgroups/fs/utils.go0000644000175000017500000000370112524212370023740 0ustar tianontianonpackage fs import ( "errors" "fmt" "io/ioutil" "path/filepath" "strconv" "strings" ) var ( ErrNotSupportStat = errors.New("stats are not supported for subsystem") ErrNotValidFormat = errors.New("line is not a valid key value format") ) // Saturates negative values at zero and returns a uint64. // Due to kernel bugs, some of the memory cgroup stats can be negative. func parseUint(s string, base, bitSize int) (uint64, error) { value, err := strconv.ParseUint(s, base, bitSize) if err != nil { intValue, intErr := strconv.ParseInt(s, base, bitSize) // 1. Handle negative values greater than MinInt64 (and) // 2. Handle negative values lesser than MinInt64 if intErr == nil && intValue < 0 { return 0, nil } else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 { return 0, nil } return value, err } return value, nil } // Parses a cgroup param and returns as name, value // i.e. "io_service_bytes 1234" will return as io_service_bytes, 1234 func getCgroupParamKeyValue(t string) (string, uint64, error) { parts := strings.Fields(t) switch len(parts) { case 2: value, err := parseUint(parts[1], 10, 64) if err != nil { return "", 0, fmt.Errorf("Unable to convert param value (%q) to uint64: %v", parts[1], err) } return parts[0], value, nil default: return "", 0, ErrNotValidFormat } } // Gets a single uint64 value from the specified cgroup file. func getCgroupParamUint(cgroupPath, cgroupFile string) (uint64, error) { contents, err := ioutil.ReadFile(filepath.Join(cgroupPath, cgroupFile)) if err != nil { return 0, err } return parseUint(strings.TrimSpace(string(contents)), 10, 64) } // Gets a string value from the specified cgroup file func getCgroupParamString(cgroupPath, cgroupFile string) (string, error) { contents, err := ioutil.ReadFile(filepath.Join(cgroupPath, cgroupFile)) if err != nil { return "", err } return strings.TrimSpace(string(contents)), nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/cgroups/fs/freezer.go0000644000175000017500000000220212524212370024235 0ustar tianontianonpackage fs import ( "fmt" "strings" "time" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" ) type FreezerGroup struct { } func (s *FreezerGroup) Apply(d *data) error { dir, err := d.join("freezer") if err != nil && !cgroups.IsNotFound(err) { return err } if err := s.Set(dir, d.c); err != nil { return err } return nil } func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error { switch cgroup.Freezer { case configs.Frozen, configs.Thawed: if err := writeFile(path, "freezer.state", string(cgroup.Freezer)); err != nil { return err } for { state, err := readFile(path, "freezer.state") if err != nil { return err } if strings.TrimSpace(state) == string(cgroup.Freezer) { break } time.Sleep(1 * time.Millisecond) } case configs.Undefined: return nil default: return fmt.Errorf("Invalid argument '%s' to freezer.state", string(cgroup.Freezer)) } return nil } func (s *FreezerGroup) Remove(d *data) error { return removePath(d.path("freezer")) } func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error { return nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/cgroups/utils.go0000644000175000017500000001123612524212370023332 0ustar tianontianonpackage cgroups import ( "bufio" "fmt" "io" "io/ioutil" "os" "path/filepath" "strconv" "strings" "time" "github.com/docker/docker/pkg/mount" ) // https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt func FindCgroupMountpoint(subsystem string) (string, error) { mounts, err := mount.GetMounts() if err != nil { return "", err } for _, mount := range mounts { if mount.Fstype == "cgroup" { for _, opt := range strings.Split(mount.VfsOpts, ",") { if opt == subsystem { return mount.Mountpoint, nil } } } } return "", NewNotFoundError(subsystem) } func FindCgroupMountpointDir() (string, error) { mounts, err := mount.GetMounts() if err != nil { return "", err } for _, mount := range mounts { if mount.Fstype == "cgroup" { return filepath.Dir(mount.Mountpoint), nil } } return "", NewNotFoundError("cgroup") } type Mount struct { Mountpoint string Subsystems []string } func (m Mount) GetThisCgroupDir() (string, error) { if len(m.Subsystems) == 0 { return "", fmt.Errorf("no subsystem for mount") } return GetThisCgroupDir(m.Subsystems[0]) } func GetCgroupMounts() ([]Mount, error) { mounts, err := mount.GetMounts() if err != nil { return nil, err } all, err := GetAllSubsystems() if err != nil { return nil, err } allMap := make(map[string]bool) for _, s := range all { allMap[s] = true } res := []Mount{} for _, mount := range mounts { if mount.Fstype == "cgroup" { m := Mount{Mountpoint: mount.Mountpoint} for _, opt := range strings.Split(mount.VfsOpts, ",") { if strings.HasPrefix(opt, "name=") { m.Subsystems = append(m.Subsystems, opt) } if allMap[opt] { m.Subsystems = append(m.Subsystems, opt) } } res = append(res, m) } } return res, nil } // Returns all the cgroup subsystems supported by the kernel func GetAllSubsystems() ([]string, error) { f, err := os.Open("/proc/cgroups") if err != nil { return nil, err } defer f.Close() subsystems := []string{} s := bufio.NewScanner(f) for s.Scan() { if err := s.Err(); err != nil { return nil, err } text := s.Text() if text[0] != '#' { parts := strings.Fields(text) if len(parts) >= 4 && parts[3] != "0" { subsystems = append(subsystems, parts[0]) } } } return subsystems, nil } // Returns the relative path to the cgroup docker is running in. func GetThisCgroupDir(subsystem string) (string, error) { f, err := os.Open("/proc/self/cgroup") if err != nil { return "", err } defer f.Close() return ParseCgroupFile(subsystem, f) } func GetInitCgroupDir(subsystem string) (string, error) { f, err := os.Open("/proc/1/cgroup") if err != nil { return "", err } defer f.Close() return ParseCgroupFile(subsystem, f) } func ReadProcsFile(dir string) ([]int, error) { f, err := os.Open(filepath.Join(dir, "cgroup.procs")) if err != nil { return nil, err } defer f.Close() var ( s = bufio.NewScanner(f) out = []int{} ) for s.Scan() { if t := s.Text(); t != "" { pid, err := strconv.Atoi(t) if err != nil { return nil, err } out = append(out, pid) } } return out, nil } func ParseCgroupFile(subsystem string, r io.Reader) (string, error) { s := bufio.NewScanner(r) for s.Scan() { if err := s.Err(); err != nil { return "", err } text := s.Text() parts := strings.Split(text, ":") for _, subs := range strings.Split(parts[1], ",") { if subs == subsystem { return parts[2], nil } } } return "", NewNotFoundError(subsystem) } func PathExists(path string) bool { if _, err := os.Stat(path); err != nil { return false } return true } func EnterPid(cgroupPaths map[string]string, pid int) error { for _, path := range cgroupPaths { if PathExists(path) { if err := ioutil.WriteFile(filepath.Join(path, "cgroup.procs"), []byte(strconv.Itoa(pid)), 0700); err != nil { return err } } } return nil } // RemovePaths iterates over the provided paths removing them. // We trying to remove all paths five times with increasing delay between tries. // If after all there are not removed cgroups - appropriate error will be // returned. func RemovePaths(paths map[string]string) (err error) { delay := 10 * time.Millisecond for i := 0; i < 5; i++ { if i != 0 { time.Sleep(delay) delay *= 2 } for s, p := range paths { os.RemoveAll(p) // TODO: here probably should be logging _, err := os.Stat(p) // We need this strange way of checking cgroups existence because // RemoveAll almost always returns error, even on already removed // cgroups if os.IsNotExist(err) { delete(paths, s) } } if len(paths) == 0 { return nil } } return fmt.Errorf("Failed to remove paths: %s", paths) } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/notify_linux.go0000644000175000017500000000301212524212370023230 0ustar tianontianon// +build linux package libcontainer import ( "fmt" "io/ioutil" "os" "path/filepath" "syscall" ) const oomCgroupName = "memory" // notifyOnOOM returns channel on which you can expect event about OOM, // if process died without OOM this channel will be closed. // s is current *libcontainer.State for container. func notifyOnOOM(paths map[string]string) (<-chan struct{}, error) { dir := paths[oomCgroupName] if dir == "" { return nil, fmt.Errorf("There is no path for %q in state", oomCgroupName) } oomControl, err := os.Open(filepath.Join(dir, "memory.oom_control")) if err != nil { return nil, err } fd, _, syserr := syscall.RawSyscall(syscall.SYS_EVENTFD2, 0, syscall.FD_CLOEXEC, 0) if syserr != 0 { oomControl.Close() return nil, syserr } eventfd := os.NewFile(fd, "eventfd") eventControlPath := filepath.Join(dir, "cgroup.event_control") data := fmt.Sprintf("%d %d", eventfd.Fd(), oomControl.Fd()) if err := ioutil.WriteFile(eventControlPath, []byte(data), 0700); err != nil { eventfd.Close() oomControl.Close() return nil, err } ch := make(chan struct{}) go func() { defer func() { close(ch) eventfd.Close() oomControl.Close() }() buf := make([]byte, 8) for { if _, err := eventfd.Read(buf); err != nil { return } // When a cgroup is destroyed, an event is sent to eventfd. // So if the control path is gone, return instead of notifying. if _, err := os.Lstat(eventControlPath); os.IsNotExist(err) { return } ch <- struct{}{} } }() return ch, nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/SPEC.md0000644000175000017500000003172512524212370021242 0ustar tianontianon## Container Specification - v1 This is the standard configuration for version 1 containers. It includes namespaces, standard filesystem setup, a default Linux capability set, and information about resource reservations. It also has information about any populated environment settings for the processes running inside a container. Along with the configuration of how a container is created the standard also discusses actions that can be performed on a container to manage and inspect information about the processes running inside. The v1 profile is meant to be able to accommodate the majority of applications with a strong security configuration. ### System Requirements and Compatibility Minimum requirements: * Kernel version - 3.8 recommended 2.6.2x minimum(with backported patches) * Mounted cgroups with each subsystem in its own hierarchy ### Namespaces | Flag | Enabled | | ------------ | ------- | | CLONE_NEWPID | 1 | | CLONE_NEWUTS | 1 | | CLONE_NEWIPC | 1 | | CLONE_NEWNET | 1 | | CLONE_NEWNS | 1 | | CLONE_NEWUSER | 0 | In v1 the user namespace is not enabled by default for support of older kernels where the user namespace feature is not fully implemented. Namespaces are created for the container via the `clone` syscall. ### Filesystem A root filesystem must be provided to a container for execution. The container will use this root filesystem (rootfs) to jail and spawn processes inside where the binaries and system libraries are local to that directory. Any binaries to be executed must be contained within this rootfs. Mounts that happen inside the container are automatically cleaned up when the container exits as the mount namespace is destroyed and the kernel will unmount all the mounts that were setup within that namespace. For a container to execute properly there are certain filesystems that are required to be mounted within the rootfs that the runtime will setup. | Path | Type | Flags | Data | | ----------- | ------ | -------------------------------------- | --------------------------------------- | | /proc | proc | MS_NOEXEC,MS_NOSUID,MS_NODEV | | | /dev | tmpfs | MS_NOEXEC,MS_STRICTATIME | mode=755 | | /dev/shm | shm | MS_NOEXEC,MS_NOSUID,MS_NODEV | mode=1777,size=65536k | | /dev/mqueue | mqueue | MS_NOEXEC,MS_NOSUID,MS_NODEV | | | /dev/pts | devpts | MS_NOEXEC,MS_NOSUID | newinstance,ptmxmode=0666,mode=620,gid5 | | /sys | sysfs | MS_NOEXEC,MS_NOSUID,MS_NODEV,MS_RDONLY | | After a container's filesystems are mounted within the newly created mount namespace `/dev` will need to be populated with a set of device nodes. It is expected that a rootfs does not need to have any device nodes specified for `/dev` witin the rootfs as the container will setup the correct devices that are required for executing a container's process. | Path | Mode | Access | | ------------ | ---- | ---------- | | /dev/null | 0666 | rwm | | /dev/zero | 0666 | rwm | | /dev/full | 0666 | rwm | | /dev/tty | 0666 | rwm | | /dev/random | 0666 | rwm | | /dev/urandom | 0666 | rwm | | /dev/fuse | 0666 | rwm | **ptmx** `/dev/ptmx` will need to be a symlink to the host's `/dev/ptmx` within the container. The use of a pseudo TTY is optional within a container and it should support both. If a pseudo is provided to the container `/dev/console` will need to be setup by binding the console in `/dev/` after it has been populated and mounted in tmpfs. | Source | Destination | UID GID | Mode | Type | | --------------- | ------------ | ------- | ---- | ---- | | *pty host path* | /dev/console | 0 0 | 0600 | bind | After `/dev/null` has been setup we check for any external links between the container's io, STDIN, STDOUT, STDERR. If the container's io is pointing to `/dev/null` outside the container we close and `dup2` the the `/dev/null` that is local to the container's rootfs. After the container has `/proc` mounted a few standard symlinks are setup within `/dev/` for the io. | Source | Destination | | ------------ | ----------- | | /proc/1/fd | /dev/fd | | /proc/1/fd/0 | /dev/stdin | | /proc/1/fd/1 | /dev/stdout | | /proc/1/fd/2 | /dev/stderr | A `pivot_root` is used to change the root for the process, effectively jailing the process inside the rootfs. ```c put_old = mkdir(...); pivot_root(rootfs, put_old); chdir("/"); unmount(put_old, MS_DETACH); rmdir(put_old); ``` For container's running with a rootfs inside `ramfs` a `MS_MOVE` combined with a `chroot` is required as `pivot_root` is not supported in `ramfs`. ```c mount(rootfs, "/", NULL, MS_MOVE, NULL); chroot("."); chdir("/"); ``` The `umask` is set back to `0022` after the filesystem setup has been completed. ### Resources Cgroups are used to handle resource allocation for containers. This includes system resources like cpu, memory, and device access. | Subsystem | Enabled | | ---------- | ------- | | devices | 1 | | memory | 1 | | cpu | 1 | | cpuacct | 1 | | cpuset | 1 | | blkio | 1 | | perf_event | 1 | | freezer | 1 | All cgroup subsystem are joined so that statistics can be collected from each of the subsystems. Freezer does not expose any stats but is joined so that containers can be paused and resumed. The parent process of the container's init must place the init pid inside the correct cgroups before the initialization begins. This is done so that no processes or threads escape the cgroups. This sync is done via a pipe ( specified in the runtime section below ) that the container's init process will block waiting for the parent to finish setup. ### Security The standard set of Linux capabilities that are set in a container provide a good default for security and flexibility for the applications. | Capability | Enabled | | -------------------- | ------- | | CAP_NET_RAW | 1 | | CAP_NET_BIND_SERVICE | 1 | | CAP_AUDIT_WRITE | 1 | | CAP_DAC_OVERRIDE | 1 | | CAP_SETFCAP | 1 | | CAP_SETPCAP | 1 | | CAP_SETGID | 1 | | CAP_SETUID | 1 | | CAP_MKNOD | 1 | | CAP_CHOWN | 1 | | CAP_FOWNER | 1 | | CAP_FSETID | 1 | | CAP_KILL | 1 | | CAP_SYS_CHROOT | 1 | | CAP_NET_BROADCAST | 0 | | CAP_SYS_MODULE | 0 | | CAP_SYS_RAWIO | 0 | | CAP_SYS_PACCT | 0 | | CAP_SYS_ADMIN | 0 | | CAP_SYS_NICE | 0 | | CAP_SYS_RESOURCE | 0 | | CAP_SYS_TIME | 0 | | CAP_SYS_TTY_CONFIG | 0 | | CAP_AUDIT_CONTROL | 0 | | CAP_MAC_OVERRIDE | 0 | | CAP_MAC_ADMIN | 0 | | CAP_NET_ADMIN | 0 | | CAP_SYSLOG | 0 | | CAP_DAC_READ_SEARCH | 0 | | CAP_LINUX_IMMUTABLE | 0 | | CAP_IPC_LOCK | 0 | | CAP_IPC_OWNER | 0 | | CAP_SYS_PTRACE | 0 | | CAP_SYS_BOOT | 0 | | CAP_LEASE | 0 | | CAP_WAKE_ALARM | 0 | | CAP_BLOCK_SUSPE | 0 | Additional security layers like [apparmor](https://wiki.ubuntu.com/AppArmor) and [selinux](http://selinuxproject.org/page/Main_Page) can be used with the containers. A container should support setting an apparmor profile or selinux process and mount labels if provided in the configuration. Standard apparmor profile: ```c #include profile flags=(attach_disconnected,mediate_deleted) { #include network, capability, file, umount, deny @{PROC}/sys/fs/** wklx, deny @{PROC}/sysrq-trigger rwklx, deny @{PROC}/mem rwklx, deny @{PROC}/kmem rwklx, deny @{PROC}/sys/kernel/[^s][^h][^m]* wklx, deny @{PROC}/sys/kernel/*/** wklx, deny mount, deny /sys/[^f]*/** wklx, deny /sys/f[^s]*/** wklx, deny /sys/fs/[^c]*/** wklx, deny /sys/fs/c[^g]*/** wklx, deny /sys/fs/cg[^r]*/** wklx, deny /sys/firmware/efi/efivars/** rwklx, deny /sys/kernel/security/** rwklx, } ``` *TODO: seccomp work is being done to find a good default config* ### Runtime and Init Process During container creation the parent process needs to talk to the container's init process and have a form of synchronization. This is accomplished by creating a pipe that is passed to the container's init. When the init process first spawns it will block on its side of the pipe until the parent closes its side. This allows the parent to have time to set the new process inside a cgroup hierarchy and/or write any uid/gid mappings required for user namespaces. The pipe is passed to the init process via FD 3. The application consuming libcontainer should be compiled statically. libcontainer does not define any init process and the arguments provided are used to `exec` the process inside the application. There should be no long running init within the container spec. If a pseudo tty is provided to a container it will open and `dup2` the console as the container's STDIN, STDOUT, STDERR as well as mounting the console as `/dev/console`. An extra set of mounts are provided to a container and setup for use. A container's rootfs can contain some non portable files inside that can cause side effects during execution of a process. These files are usually created and populated with the container specific information via the runtime. **Extra runtime files:** * /etc/hosts * /etc/resolv.conf * /etc/hostname * /etc/localtime #### Defaults There are a few defaults that can be overridden by users, but in their omission these apply to processes within a container. | Type | Value | | ------------------- | ------------------------------ | | Parent Death Signal | SIGKILL | | UID | 0 | | GID | 0 | | GROUPS | 0, NULL | | CWD | "/" | | $HOME | Current user's home dir or "/" | | Readonly rootfs | false | | Pseudo TTY | false | ## Actions After a container is created there is a standard set of actions that can be done to the container. These actions are part of the public API for a container. | Action | Description | | -------------- | ------------------------------------------------------------------ | | Get processes | Return all the pids for processes running inside a container | | Get Stats | Return resource statistics for the container as a whole | | Wait | Wait waits on the container's init process ( pid 1 ) | | Wait Process | Wait on any of the container's processes returning the exit status | | Destroy | Kill the container's init process and remove any filesystem state | | Signal | Send a signal to the container's init process | | Signal Process | Send a signal to any of the container's processes | | Pause | Pause all processes inside the container | | Resume | Resume all processes inside the container if paused | | Exec | Execute a new process inside of the container ( requires setns ) | ### Execute a new process inside of a running container. User can execute a new process inside of a running container. Any binaries to be executed must be accessible within the container's rootfs. The started process will run inside the container's rootfs. Any changes made by the process to the container's filesystem will persist after the process finished executing. The started process will join all the container's existing namespaces. When the container is paused, the process will also be paused and will resume when the container is unpaused. The started process will only run when the container's primary process (PID 1) is running, and will not be restarted when the container is restarted. #### Planned additions The started process will have its own cgroups nested inside the container's cgroups. This is used for process tracking and optionally resource allocation handling for the new process. Freezer cgroup is required, the rest of the cgroups are optional. The process executor must place its pid inside the correct cgroups before starting the process. This is done so that no child processes or threads can escape the cgroups. When the process is stopped, the process executor will try (in a best-effort way) to stop all its children and remove the sub-cgroups. libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/README.md0000644000175000017500000001356112524212370021443 0ustar tianontianon## libcontainer - reference implementation for containers [![Build Status](https://jenkins.dockerproject.com/buildStatus/icon?job=Libcontainer Master)](https://jenkins.dockerproject.com/job/Libcontainer%20Master/) Libcontainer provides a native Go implementation for creating containers with namespaces, cgroups, capabilities, and filesystem access controls. It allows you to manage the lifecycle of the container performing additional operations after the container is created. #### Container A container is a self contained execution environment that shares the kernel of the host system and which is (optionally) isolated from other containers in the system. #### Using libcontainer To create a container you first have to initialize an instance of a factory that will handle the creation and initialization for a container. Because containers are spawned in a two step process you will need to provide arguments to a binary that will be executed as the init process for the container. To use the current binary that is spawning the containers and acting as the parent you can use `os.Args[0]` and we have a command called `init` setup. ```go root, err := libcontainer.New("/var/lib/container", libcontainer.InitArgs(os.Args[0], "init")) if err != nil { log.Fatal(err) } ``` Once you have an instance of the factory created we can create a configuration struct describing how the container is to be created. A sample would look similar to this: ```go config := &configs.Config{ Rootfs: rootfs, Capabilities: []string{ "CHOWN", "DAC_OVERRIDE", "FSETID", "FOWNER", "MKNOD", "NET_RAW", "SETGID", "SETUID", "SETFCAP", "SETPCAP", "NET_BIND_SERVICE", "SYS_CHROOT", "KILL", "AUDIT_WRITE", }, Namespaces: configs.Namespaces([]configs.Namespace{ {Type: configs.NEWNS}, {Type: configs.NEWUTS}, {Type: configs.NEWIPC}, {Type: configs.NEWPID}, {Type: configs.NEWNET}, }), Cgroups: &configs.Cgroup{ Name: "test-container", Parent: "system", AllowAllDevices: false, AllowedDevices: configs.DefaultAllowedDevices, }, Devices: configs.DefaultAutoCreatedDevices, Hostname: "testing", Networks: []*configs.Network{ { Type: "loopback", Address: "127.0.0.1/0", Gateway: "localhost", }, }, Rlimits: []configs.Rlimit{ { Type: syscall.RLIMIT_NOFILE, Hard: uint64(1024), Soft: uint64(1024), }, }, } ``` Once you have the configuration populated you can create a container: ```go container, err := root.Create("container-id", config) ``` To spawn bash as the initial process inside the container and have the processes pid returned in order to wait, signal, or kill the process: ```go process := &libcontainer.Process{ Args: []string{"/bin/bash"}, Env: []string{"PATH=/bin"}, User: "daemon", Stdin: os.Stdin, Stdout: os.Stdout, Stderr: os.Stderr, } err := container.Start(process) if err != nil { log.Fatal(err) } // wait for the process to finish. status, err := process.Wait() if err != nil { log.Fatal(err) } // destroy the container. container.Destroy() ``` Additional ways to interact with a running container are: ```go // return all the pids for all processes running inside the container. processes, err := container.Processes() // get detailed cpu, memory, io, and network statistics for the container and // it's processes. stats, err := container.Stats() // pause all processes inside the container. container.Pause() // resume all paused processes. container.Resume() ``` #### nsinit `nsinit` is a cli application which demonstrates the use of libcontainer. It is able to spawn new containers or join existing containers. A root filesystem must be provided for use along with a container configuration file. To build `nsinit`, run `make binary`. It will save the binary into `bundles/nsinit`. To use `nsinit`, cd into a Linux rootfs and copy a `container.json` file into the directory with your specified configuration. Environment, networking, and different capabilities for the container are specified in this file. The configuration is used for each process executed inside the container. See the `sample_configs` folder for examples of what the container configuration should look like. To execute `/bin/bash` in the current directory as a container just run the following **as root**: ```bash nsinit exec --tty /bin/bash ``` If you wish to spawn another process inside the container while your current bash session is running, run the same command again to get another bash shell (or change the command). If the original process (PID 1) dies, all other processes spawned inside the container will be killed and the namespace will be removed. You can identify if a process is running in a container by looking to see if `state.json` is in the root of the directory. You may also specify an alternate root place where the `container.json` file is read and where the `state.json` file will be saved. #### Future See the [roadmap](ROADMAP.md). ## Copyright and license Code and documentation copyright 2014 Docker, inc. Code released under the Apache 2.0 license. Docs released under Creative commons. ## Hacking on libcontainer First of all, please familiarise yourself with the [libcontainer Principles](PRINCIPLES.md). If you're a *contributor* or aspiring contributor, you should read the [Contributors' Guide](CONTRIBUTING.md). If you're a *maintainer* or aspiring maintainer, you should read the [Maintainers' Guide](MAINTAINERS_GUIDE.md) and "How can I become a maintainer?" in the Contributors' Guide. libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/CONTRIBUTING.md0000644000175000017500000002315112524212370022411 0ustar tianontianon# The libcontainer Contributors' Guide Want to hack on libcontainer? Awesome! Here are instructions to get you started. They are probably not perfect, please let us know if anything feels wrong or incomplete. ## Reporting Issues When reporting [issues](https://github.com/docker/libcontainer/issues) on GitHub please include your host OS (Ubuntu 12.04, Fedora 19, etc), the output of `uname -a`. Please include the steps required to reproduce the problem if possible and applicable. This information will help us review and fix your issue faster. ## Development Environment ### Requirements For best results, use a Linux development environment. The following packages are required to compile libcontainer natively. - Golang 1.3 - GCC - git - cgutils You can develop on OSX, but you are limited to Dockerfile-based builds only. ### Building libcontainer from Dockerfile make all This is the easiest way of building libcontainer. As this build is done using Docker, you can even run this from [OSX](https://github.com/boot2docker/boot2docker) ### Testing changes with "nsinit" make sh This will create an container that runs `nsinit exec sh` on a busybox rootfs with the configuration from ['minimal.json'](https://github.com/docker/libcontainer/blob/master/sample_configs/minimal.json). Like the previous command, you can run this on OSX too! ### Building libcontainer directly > Note: You should add the `vendor` directory to your GOPATH to use the vendored libraries ./update-vendor.sh go get -d ./... make direct-build # Run the tests make direct-test-short | egrep --color 'FAIL|$' # Run all the test make direct-test | egrep --color 'FAIL|$' ### Testing Changes with "nsinit" directly To test a change: # Install nsinit make direct-install # Optional, add a docker0 bridge ip link add docker0 type bridge ifconfig docker0 172.17.0.1/16 up mkdir testfs curl -sSL https://github.com/jpetazzo/docker-busybox/raw/buildroot-2014.02/rootfs.tar | tar -xC testfs cd testfs cp container.json nsinit exec sh ## Contribution Guidelines ### Pull requests are always welcome We are always thrilled to receive pull requests, and do our best to process them as fast as possible. Not sure if that typo is worth a pull request? Do it! We will appreciate it. If your pull request is not accepted on the first try, don't be discouraged! If there's a problem with the implementation, hopefully you received feedback on what to improve. We're trying very hard to keep libcontainer lean and focused. We don't want it to do everything for everybody. This means that we might decide against incorporating a new feature. However, there might be a way to implement that feature *on top of* libcontainer. ### Discuss your design on the mailing list We recommend discussing your plans [on the mailing list](https://groups.google.com/forum/?fromgroups#!forum/libcontainer) before starting to code - especially for more ambitious contributions. This gives other contributors a chance to point you in the right direction, give feedback on your design, and maybe point out if someone else is working on the same thing. ### Create issues... Any significant improvement should be documented as [a GitHub issue](https://github.com/docker/libcontainer/issues) before anybody starts working on it. ### ...but check for existing issues first! Please take a moment to check that an issue doesn't already exist documenting your bug report or improvement proposal. If it does, it never hurts to add a quick "+1" or "I have this problem too". This will help prioritize the most common problems and requests. ### Conventions Fork the repo and make changes on your fork in a feature branch: - If it's a bugfix branch, name it XXX-something where XXX is the number of the issue - If it's a feature branch, create an enhancement issue to announce your intentions, and name it XXX-something where XXX is the number of the issue. Submit unit tests for your changes. Go has a great test framework built in; use it! Take a look at existing tests for inspiration. Run the full test suite on your branch before submitting a pull request. Update the documentation when creating or modifying features. Test your documentation changes for clarity, concision, and correctness, as well as a clean documentation build. See ``docs/README.md`` for more information on building the docs and how docs get released. Write clean code. Universally formatted code promotes ease of writing, reading, and maintenance. Always run `gofmt -s -w file.go` on each changed file before committing your changes. Most editors have plugins that do this automatically. Pull requests descriptions should be as clear as possible and include a reference to all the issues that they address. Pull requests must not contain commits from other users or branches. Commit messages must start with a capitalized and short summary (max. 50 chars) written in the imperative, followed by an optional, more detailed explanatory text which is separated from the summary by an empty line. Code review comments may be added to your pull request. Discuss, then make the suggested modifications and push additional commits to your feature branch. Be sure to post a comment after pushing. The new commits will show up in the pull request automatically, but the reviewers will not be notified unless you comment. Before the pull request is merged, make sure that you squash your commits into logical units of work using `git rebase -i` and `git push -f`. After every commit the test suite should be passing. Include documentation changes in the same commit so that a revert would remove all traces of the feature or fix. Commits that fix or close an issue should include a reference like `Closes #XXX` or `Fixes #XXX`, which will automatically close the issue when merged. ### Testing Make sure you include suitable tests, preferably unit tests, in your pull request and that all the tests pass. *Instructions for running tests to be added.* ### Merge approval libcontainer maintainers use LGTM (looks good to me) in comments on the code review to indicate acceptance. A change requires LGTMs from at lease two maintainers. One of those must come from a maintainer of the component affected. For example, if a change affects `netlink/` and `security`, it needs at least one LGTM from a maintainer of each. Maintainers only need one LGTM as presumably they LGTM their own change. For more details see [MAINTAINERS.md](MAINTAINERS.md) ### Sign your work The sign-off is a simple line at the end of the explanation for the patch, which certifies that you wrote it or otherwise have the right to pass it on as an open-source patch. The rules are pretty simple: if you can certify the below (from [developercertificate.org](http://developercertificate.org/)): ``` Developer Certificate of Origin Version 1.1 Copyright (C) 2004, 2006 The Linux Foundation and its contributors. 660 York Street, Suite 102, San Francisco, CA 94110 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Developer's Certificate of Origin 1.1 By making a contribution to this project, I certify that: (a) The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file; or (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as indicated in the file; or (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it. (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved. ``` then you just add a line to every git commit message: Docker-DCO-1.1-Signed-off-by: Joe Smith (github: github_handle) using your real name (sorry, no pseudonyms or anonymous contributions.) One way to automate this, is customise your get ``commit.template`` by adding a ``prepare-commit-msg`` hook to your libcontainer checkout: ``` curl -o .git/hooks/prepare-commit-msg https://raw.githubusercontent.com/docker/docker/master/contrib/prepare-commit-msg.hook && chmod +x .git/hooks/prepare-commit-msg ``` * Note: the above script expects to find your GitHub user name in ``git config --get github.user`` #### Small patch exception There are several exceptions to the signing requirement. Currently these are: * Your patch fixes spelling or grammar errors. * Your patch is a single line change to documentation contained in the `docs` directory. * Your patch fixes Markdown formatting or syntax errors in the documentation contained in the `docs` directory. If you have any questions, please refer to the FAQ in the [docs](to be written) ### How can I become a maintainer? * Step 1: learn the component inside out * Step 2: make yourself useful by contributing code, bugfixes, support etc. * Step 3: volunteer on the irc channel (#libcontainer@freenode) Don't forget: being a maintainer is a time investment. Make sure you will have time to make yourself available. You don't have to be a maintainer to make a difference on the project! libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/apparmor/0000755000175000017500000000000012524212370021777 5ustar tianontianonlibcontainer-227771c8f611f03639f0eeb169428761d9504ab5/apparmor/apparmor.go0000644000175000017500000000123312524212370024146 0ustar tianontianon// +build apparmor,linux package apparmor // #cgo LDFLAGS: -lapparmor // #include // #include import "C" import ( "io/ioutil" "os" "unsafe" ) func IsEnabled() bool { if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil && os.Getenv("container") == "" { buf, err := ioutil.ReadFile("/sys/module/apparmor/parameters/enabled") return err == nil && len(buf) > 1 && buf[0] == 'Y' } return false } func ApplyProfile(name string) error { if name == "" { return nil } cName := C.CString(name) defer C.free(unsafe.Pointer(cName)) if _, err := C.aa_change_onexec(cName); err != nil { return err } return nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/apparmor/apparmor_disabled.go0000644000175000017500000000021412524212370025773 0ustar tianontianon// +build !apparmor !linux package apparmor func IsEnabled() bool { return false } func ApplyProfile(name string) error { return nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/apparmor/gen.go0000644000175000017500000000321112524212370023074 0ustar tianontianonpackage apparmor import ( "io" "os" "text/template" ) type data struct { Name string Imports []string InnerImports []string } const baseTemplate = ` {{range $value := .Imports}} {{$value}} {{end}} profile {{.Name}} flags=(attach_disconnected,mediate_deleted) { {{range $value := .InnerImports}} {{$value}} {{end}} network, capability, file, umount, deny @{PROC}/sys/fs/** wklx, deny @{PROC}/sysrq-trigger rwklx, deny @{PROC}/mem rwklx, deny @{PROC}/kmem rwklx, deny @{PROC}/sys/kernel/[^s][^h][^m]* wklx, deny @{PROC}/sys/kernel/*/** wklx, deny mount, deny /sys/[^f]*/** wklx, deny /sys/f[^s]*/** wklx, deny /sys/fs/[^c]*/** wklx, deny /sys/fs/c[^g]*/** wklx, deny /sys/fs/cg[^r]*/** wklx, deny /sys/firmware/efi/efivars/** rwklx, deny /sys/kernel/security/** rwklx, } ` func generateProfile(out io.Writer) error { compiled, err := template.New("apparmor_profile").Parse(baseTemplate) if err != nil { return err } data := &data{ Name: "docker-default", } if tunablesExists() { data.Imports = append(data.Imports, "#include ") } else { data.Imports = append(data.Imports, "@{PROC}=/proc/") } if abstractionsExists() { data.InnerImports = append(data.InnerImports, "#include ") } if err := compiled.Execute(out, data); err != nil { return err } return nil } // check if the tunables/global exist func tunablesExists() bool { _, err := os.Stat("/etc/apparmor.d/tunables/global") return err == nil } // check if abstractions/base exist func abstractionsExists() bool { _, err := os.Stat("/etc/apparmor.d/abstractions/base") return err == nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/apparmor/setup.go0000644000175000017500000000155112524212370023470 0ustar tianontianonpackage apparmor import ( "fmt" "os" "os/exec" "path" ) const ( DefaultProfilePath = "/etc/apparmor.d/docker" ) func InstallDefaultProfile() error { if !IsEnabled() { return nil } // Make sure /etc/apparmor.d exists if err := os.MkdirAll(path.Dir(DefaultProfilePath), 0755); err != nil { return err } f, err := os.OpenFile(DefaultProfilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) if err != nil { return err } if err := generateProfile(f); err != nil { f.Close() return err } f.Close() cmd := exec.Command("/sbin/apparmor_parser", "-r", "-W", "docker") // to use the parser directly we have to make sure we are in the correct // dir with the profile cmd.Dir = "/etc/apparmor.d" output, err := cmd.CombinedOutput() if err != nil { return fmt.Errorf("Error loading docker apparmor profile: %s (%s)", err, output) } return nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/stats.go0000644000175000017500000000061112524212370021641 0ustar tianontianonpackage libcontainer import "github.com/docker/libcontainer/cgroups" type Stats struct { Interfaces []*NetworkInterface CgroupStats *cgroups.Stats } type NetworkInterface struct { // Name is the name of the network interface. Name string RxBytes uint64 RxPackets uint64 RxErrors uint64 RxDropped uint64 TxBytes uint64 TxPackets uint64 TxErrors uint64 TxDropped uint64 } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/Dockerfile0000644000175000017500000000131712524212370022152 0ustar tianontianonFROM golang:1.4 RUN go get golang.org/x/tools/cmd/cover ENV GOPATH $GOPATH:/go/src/github.com/docker/libcontainer/vendor RUN go get github.com/docker/docker/pkg/term # setup a playground for us to spawn containers in RUN mkdir /busybox && \ curl -sSL 'https://github.com/jpetazzo/docker-busybox/raw/buildroot-2014.11/rootfs.tar' | tar -xC /busybox RUN curl -sSL https://raw.githubusercontent.com/docker/docker/master/hack/dind -o /dind && \ chmod +x /dind COPY . /go/src/github.com/docker/libcontainer WORKDIR /go/src/github.com/docker/libcontainer RUN cp sample_configs/minimal.json /busybox/container.json RUN go get -d -v ./... RUN make direct-install ENTRYPOINT ["/dind"] CMD ["make", "direct-test"] libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/nsenter/0000755000175000017500000000000012524212370021634 5ustar tianontianonlibcontainer-227771c8f611f03639f0eeb169428761d9504ab5/nsenter/nsenter_test.go0000644000175000017500000000314212524212370024700 0ustar tianontianonpackage nsenter import ( "encoding/json" "fmt" "os" "os/exec" "strings" "testing" ) type pid struct { Pid int `json:"Pid"` } func TestNsenterAlivePid(t *testing.T) { args := []string{"nsenter-exec"} r, w, err := os.Pipe() if err != nil { t.Fatalf("failed to create pipe %v", err) } cmd := &exec.Cmd{ Path: os.Args[0], Args: args, ExtraFiles: []*os.File{w}, Env: []string{fmt.Sprintf("_LIBCONTAINER_INITPID=%d", os.Getpid())}, } if err := cmd.Start(); err != nil { t.Fatalf("nsenter failed to start %v", err) } w.Close() decoder := json.NewDecoder(r) var pid *pid if err := decoder.Decode(&pid); err != nil { t.Fatalf("%v", err) } if err := cmd.Wait(); err != nil { t.Fatalf("nsenter exits with a non-zero exit status") } p, err := os.FindProcess(pid.Pid) if err != nil { t.Fatalf("%v", err) } p.Wait() } func TestNsenterInvalidPid(t *testing.T) { args := []string{"nsenter-exec"} cmd := &exec.Cmd{ Path: os.Args[0], Args: args, Env: []string{"_LIBCONTAINER_INITPID=-1"}, } err := cmd.Run() if err == nil { t.Fatal("nsenter exits with a zero exit status") } } func TestNsenterDeadPid(t *testing.T) { dead_cmd := exec.Command("true") if err := dead_cmd.Run(); err != nil { t.Fatal(err) } args := []string{"nsenter-exec"} cmd := &exec.Cmd{ Path: os.Args[0], Args: args, Env: []string{fmt.Sprintf("_LIBCONTAINER_INITPID=%d", dead_cmd.Process.Pid)}, } err := cmd.Run() if err == nil { t.Fatal("nsenter exits with a zero exit status") } } func init() { if strings.HasPrefix(os.Args[0], "nsenter-") { os.Exit(0) } return } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/nsenter/README.md0000644000175000017500000000060612524212370023115 0ustar tianontianon## nsenter The `nsenter` package registers a special init constructor that is called before the Go runtime has a chance to boot. This provides us the ability to `setns` on existing namespaces and avoid the issues that the Go runtime has with multiple threads. This constructor is only called if this package is registered, imported, in your go application and the argv 0 is `nsenter`. libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/nsenter/nsexec.c0000644000175000017500000000722712524212370023275 0ustar tianontianon#define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* All arguments should be above stack, because it grows down */ struct clone_arg { /* * Reserve some space for clone() to locate arguments * and retcode in this place */ char stack[4096] __attribute__ ((aligned(8))); char stack_ptr[0]; jmp_buf *env; }; #define pr_perror(fmt, ...) fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__) static int child_func(void *_arg) { struct clone_arg *arg = (struct clone_arg *)_arg; longjmp(*arg->env, 1); } // Use raw setns syscall for versions of glibc that don't include it (namely glibc-2.12) #if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14 #define _GNU_SOURCE #include "syscall.h" #ifdef SYS_setns int setns(int fd, int nstype) { return syscall(SYS_setns, fd, nstype); } #endif #endif static int clone_parent(jmp_buf * env) __attribute__ ((noinline)); static int clone_parent(jmp_buf * env) { struct clone_arg ca; int child; ca.env = env; child = clone(child_func, ca.stack_ptr, CLONE_PARENT | SIGCHLD, &ca); return child; } void nsexec() { char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt" }; const int num = sizeof(namespaces) / sizeof(char *); jmp_buf env; char buf[PATH_MAX], *val; int i, tfd, child, len, consolefd = -1; pid_t pid; char *console; val = getenv("_LIBCONTAINER_INITPID"); if (val == NULL) return; pid = atoi(val); snprintf(buf, sizeof(buf), "%d", pid); if (strcmp(val, buf)) { pr_perror("Unable to parse _LIBCONTAINER_INITPID"); exit(1); } console = getenv("_LIBCONTAINER_CONSOLE_PATH"); if (console != NULL) { consolefd = open(console, O_RDWR); if (consolefd < 0) { pr_perror("Failed to open console %s", console); exit(1); } } /* Check that the specified process exists */ snprintf(buf, PATH_MAX - 1, "/proc/%d/ns", pid); tfd = open(buf, O_DIRECTORY | O_RDONLY); if (tfd == -1) { pr_perror("Failed to open \"%s\"", buf); exit(1); } for (i = 0; i < num; i++) { struct stat st; int fd; /* Symlinks on all namespaces exist for dead processes, but they can't be opened */ if (fstatat(tfd, namespaces[i], &st, AT_SYMLINK_NOFOLLOW) == -1) { // Ignore nonexistent namespaces. if (errno == ENOENT) continue; } fd = openat(tfd, namespaces[i], O_RDONLY); if (fd == -1) { pr_perror("Failed to open ns file %s for ns %s", buf, namespaces[i]); exit(1); } // Set the namespace. if (setns(fd, 0) == -1) { pr_perror("Failed to setns for %s", namespaces[i]); exit(1); } close(fd); } if (setjmp(env) == 1) { if (setsid() == -1) { pr_perror("setsid failed"); exit(1); } if (consolefd != -1) { if (ioctl(consolefd, TIOCSCTTY, 0) == -1) { pr_perror("ioctl TIOCSCTTY failed"); exit(1); } if (dup2(consolefd, STDIN_FILENO) != STDIN_FILENO) { pr_perror("Failed to dup 0"); exit(1); } if (dup2(consolefd, STDOUT_FILENO) != STDOUT_FILENO) { pr_perror("Failed to dup 1"); exit(1); } if (dup2(consolefd, STDERR_FILENO) != STDERR_FILENO) { pr_perror("Failed to dup 2"); exit(1); } } // Finish executing, let the Go runtime take over. return; } child = clone_parent(&env); if (child < 0) { pr_perror("Unable to fork"); exit(1); } len = snprintf(buf, sizeof(buf), "{ \"pid\" : %d }\n", child); if (write(3, buf, len) != len) { pr_perror("Unable to send a child pid"); kill(child, SIGKILL); exit(1); } exit(0); } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/nsenter/nsenter_unsupported.go0000644000175000017500000000004712524212370026312 0ustar tianontianon// +build !linux !cgo package nsenter libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/nsenter/nsenter_gccgo.go0000644000175000017500000000076612524212370025014 0ustar tianontianon// +build linux,gccgo package nsenter /* #cgo CFLAGS: -Wall extern void nsexec(); void __attribute__((constructor)) init(void) { nsexec(); } */ import "C" // AlwaysFalse is here to stay false // (and be exported so the compiler doesn't optimize out its reference) var AlwaysFalse bool func init() { if AlwaysFalse { // by referencing this C init() in a noop test, it will ensure the compiler // links in the C function. // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65134 C.init() } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/nsenter/nsenter.go0000644000175000017500000000023712524212370023643 0ustar tianontianon// +build linux,!gccgo package nsenter /* #cgo CFLAGS: -Wall extern void nsexec(); void __attribute__((constructor)) init(void) { nsexec(); } */ import "C" libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/rootfs_linux_test.go0000644000175000017500000000142612524212370024302 0ustar tianontianon// +build linux package libcontainer import "testing" func TestCheckMountDestOnProc(t *testing.T) { dest := "/rootfs/proc/" err := checkMountDestination("/rootfs", dest) if err == nil { t.Fatal("destination inside proc should return an error") } } func TestCheckMountDestInSys(t *testing.T) { dest := "/rootfs//sys/fs/cgroup" err := checkMountDestination("/rootfs", dest) if err != nil { t.Fatal("destination inside /sys should not return an error") } } func TestCheckMountDestFalsePositive(t *testing.T) { dest := "/rootfs/sysfiles/fs/cgroup" err := checkMountDestination("/rootfs", dest) if err != nil { t.Fatal(err) } } func TestCheckMountRoot(t *testing.T) { dest := "/rootfs" err := checkMountDestination("/rootfs", dest) if err == nil { t.Fatal(err) } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/MAINTAINERS0000644000175000017500000000053512524212370021656 0ustar tianontianonMichael Crosby (@crosbymichael) Rohit Jnagal (@rjnagal) Victor Marmol (@vmarmol) Mrunal Patel (@mrunalp) Alexandr Morozov (@LK4D4) Daniel, Dao Quang Minh (@dqminh) update-vendor.sh: Tianon Gravi (@tianon) libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/nsinit/0000755000175000017500000000000012524212370021462 5ustar tianontianonlibcontainer-227771c8f611f03639f0eeb169428761d9504ab5/nsinit/pause.go0000644000175000017500000000153612524212370023133 0ustar tianontianonpackage main import ( "log" "github.com/codegangsta/cli" ) var pauseCommand = cli.Command{ Name: "pause", Usage: "pause the container's processes", Flags: []cli.Flag{ cli.StringFlag{Name: "id", Value: "nsinit", Usage: "specify the ID for a container"}, }, Action: func(context *cli.Context) { container, err := getContainer(context) if err != nil { log.Fatal(err) } if err = container.Pause(); err != nil { log.Fatal(err) } }, } var unpauseCommand = cli.Command{ Name: "unpause", Usage: "unpause the container's processes", Flags: []cli.Flag{ cli.StringFlag{Name: "id", Value: "nsinit", Usage: "specify the ID for a container"}, }, Action: func(context *cli.Context) { container, err := getContainer(context) if err != nil { log.Fatal(err) } if err = container.Resume(); err != nil { log.Fatal(err) } }, } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/nsinit/main.go0000644000175000017500000000177112524212370022743 0ustar tianontianonpackage main import ( "os" log "github.com/Sirupsen/logrus" "github.com/codegangsta/cli" ) func main() { app := cli.NewApp() app.Name = "nsinit" app.Version = "2" app.Author = "libcontainer maintainers" app.Flags = []cli.Flag{ cli.StringFlag{Name: "root", Value: "/var/run/nsinit", Usage: "root directory for containers"}, cli.StringFlag{Name: "log-file", Value: "", Usage: "set the log file to output logs to"}, cli.BoolFlag{Name: "debug", Usage: "enable debug output in the logs"}, } app.Commands = []cli.Command{ configCommand, execCommand, initCommand, oomCommand, pauseCommand, statsCommand, unpauseCommand, stateCommand, } app.Before = func(context *cli.Context) error { if context.GlobalBool("debug") { log.SetLevel(log.DebugLevel) } if path := context.GlobalString("log-file"); path != "" { f, err := os.Create(path) if err != nil { return err } log.SetOutput(f) } return nil } if err := app.Run(os.Args); err != nil { log.Fatal(err) } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/nsinit/README.md0000644000175000017500000000420312524212370022740 0ustar tianontianon## nsinit `nsinit` is a cli application which demonstrates the use of libcontainer. It is able to spawn new containers or join existing containers. ### How to build? First add the `libcontainer/vendor` into your GOPATH. It's because libcontainer vendors all its dependencies, so it can be built predictably. ``` export GOPATH=$GOPATH:/your/path/to/libcontainer/vendor ``` Then get into the nsinit folder and get the imported file. Use `make` command to make the nsinit binary. ``` cd libcontainer/nsinit go get make ``` We have finished compiling the nsinit package, but a root filesystem must be provided for use along with a container configuration file. Choose a proper place to run your container. For example we use `/busybox`. ``` mkdir /busybox curl -sSL 'https://github.com/jpetazzo/docker-busybox/raw/buildroot-2014.11/rootfs.tar' | tar -xC /busybox ``` Then you may need to write a configuration file named `container.json` in the `/busybox` folder. Environment, networking, and different capabilities for the container are specified in this file. The configuration is used for each process executed inside the container. See the `sample_configs` folder for examples of what the container configuration should look like. ``` cp libcontainer/sample_configs/minimal.json /busybox/container.json cd /busybox ``` You can customize `container.json` per your needs. After that, nsinit is ready to work. To execute `/bin/bash` in the current directory as a container just run the following **as root**: ```bash nsinit exec --tty --config container.json /bin/bash ``` If you wish to spawn another process inside the container while your current bash session is running, run the same command again to get another bash shell (or change the command). If the original process (PID 1) dies, all other processes spawned inside the container will be killed and the namespace will be removed. You can identify if a process is running in a container by looking to see if `state.json` is in the root of the directory. You may also specify an alternate root directory from where the `container.json` file is read and where the `state.json` file will be saved. libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/nsinit/stats.go0000644000175000017500000000112212524212370023143 0ustar tianontianonpackage main import ( "encoding/json" "fmt" "github.com/codegangsta/cli" ) var statsCommand = cli.Command{ Name: "stats", Usage: "display statistics for the container", Flags: []cli.Flag{ cli.StringFlag{Name: "id", Value: "nsinit", Usage: "specify the ID for a container"}, }, Action: func(context *cli.Context) { container, err := getContainer(context) if err != nil { fatal(err) } stats, err := container.Stats() if err != nil { fatal(err) } data, err := json.MarshalIndent(stats, "", "\t") if err != nil { fatal(err) } fmt.Printf("%s", data) }, } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/nsinit/oom.go0000644000175000017500000000111412524212370022600 0ustar tianontianonpackage main import ( "log" "github.com/codegangsta/cli" ) var oomCommand = cli.Command{ Name: "oom", Usage: "display oom notifications for a container", Flags: []cli.Flag{ cli.StringFlag{Name: "id", Value: "nsinit", Usage: "specify the ID for a container"}, }, Action: func(context *cli.Context) { container, err := getContainer(context) if err != nil { log.Fatal(err) } n, err := container.NotifyOOM() if err != nil { log.Fatal(err) } for x := range n { // hack for calm down go1.4 gofmt _ = x log.Printf("OOM notification received") } }, } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/nsinit/init.go0000644000175000017500000000113312524212370022752 0ustar tianontianonpackage main import ( "runtime" log "github.com/Sirupsen/logrus" "github.com/codegangsta/cli" "github.com/docker/libcontainer" _ "github.com/docker/libcontainer/nsenter" ) var initCommand = cli.Command{ Name: "init", Usage: "runs the init process inside the namespace", Action: func(context *cli.Context) { log.SetLevel(log.DebugLevel) runtime.GOMAXPROCS(1) runtime.LockOSThread() factory, err := libcontainer.New("") if err != nil { fatal(err) } if err := factory.StartInitialization(3); err != nil { fatal(err) } panic("This line should never been executed") }, } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/nsinit/config.go0000644000175000017500000002065312524212370023264 0ustar tianontianonpackage main import ( "bytes" "encoding/json" "io" "math" "os" "path/filepath" "strings" "syscall" "github.com/Sirupsen/logrus" "github.com/codegangsta/cli" "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/utils" ) const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV var createFlags = []cli.Flag{ cli.IntFlag{Name: "parent-death-signal", Usage: "set the signal that will be delivered to the process in case the parent dies"}, cli.BoolFlag{Name: "read-only", Usage: "set the container's rootfs as read-only"}, cli.StringSliceFlag{Name: "bind", Value: &cli.StringSlice{}, Usage: "add bind mounts to the container"}, cli.StringSliceFlag{Name: "tmpfs", Value: &cli.StringSlice{}, Usage: "add tmpfs mounts to the container"}, cli.IntFlag{Name: "cpushares", Usage: "set the cpushares for the container"}, cli.IntFlag{Name: "memory-limit", Usage: "set the memory limit for the container"}, cli.IntFlag{Name: "memory-swap", Usage: "set the memory swap limit for the container"}, cli.StringFlag{Name: "cpuset-cpus", Usage: "set the cpuset cpus"}, cli.StringFlag{Name: "cpuset-mems", Usage: "set the cpuset mems"}, cli.StringFlag{Name: "apparmor-profile", Usage: "set the apparmor profile"}, cli.StringFlag{Name: "process-label", Usage: "set the process label"}, cli.StringFlag{Name: "mount-label", Usage: "set the mount label"}, cli.StringFlag{Name: "rootfs", Usage: "set the rootfs"}, cli.IntFlag{Name: "userns-root-uid", Usage: "set the user namespace root uid"}, cli.StringFlag{Name: "hostname", Value: "nsinit", Usage: "hostname value for the container"}, cli.StringFlag{Name: "net", Value: "", Usage: "network namespace"}, cli.StringFlag{Name: "ipc", Value: "", Usage: "ipc namespace"}, cli.StringFlag{Name: "pid", Value: "", Usage: "pid namespace"}, cli.StringFlag{Name: "uts", Value: "", Usage: "uts namespace"}, cli.StringFlag{Name: "mnt", Value: "", Usage: "mount namespace"}, cli.StringFlag{Name: "veth-bridge", Usage: "veth bridge"}, cli.StringFlag{Name: "veth-address", Usage: "veth ip address"}, cli.StringFlag{Name: "veth-gateway", Usage: "veth gateway address"}, cli.IntFlag{Name: "veth-mtu", Usage: "veth mtu"}, } var configCommand = cli.Command{ Name: "config", Usage: "generate a standard configuration file for a container", Flags: append([]cli.Flag{ cli.StringFlag{Name: "file,f", Value: "stdout", Usage: "write the configuration to the specified file"}, }, createFlags...), Action: func(context *cli.Context) { template := getTemplate() modify(template, context) data, err := json.MarshalIndent(template, "", "\t") if err != nil { fatal(err) } var f *os.File filePath := context.String("file") switch filePath { case "stdout", "": f = os.Stdout default: if f, err = os.Create(filePath); err != nil { fatal(err) } defer f.Close() } if _, err := io.Copy(f, bytes.NewBuffer(data)); err != nil { fatal(err) } }, } func modify(config *configs.Config, context *cli.Context) { config.ParentDeathSignal = context.Int("parent-death-signal") config.Readonlyfs = context.Bool("read-only") config.Cgroups.CpusetCpus = context.String("cpuset-cpus") config.Cgroups.CpusetMems = context.String("cpuset-mems") config.Cgroups.CpuShares = int64(context.Int("cpushares")) config.Cgroups.Memory = int64(context.Int("memory-limit")) config.Cgroups.MemorySwap = int64(context.Int("memory-swap")) config.AppArmorProfile = context.String("apparmor-profile") config.ProcessLabel = context.String("process-label") config.MountLabel = context.String("mount-label") rootfs := context.String("rootfs") if rootfs != "" { config.Rootfs = rootfs } userns_uid := context.Int("userns-root-uid") if userns_uid != 0 { config.Namespaces.Add(configs.NEWUSER, "") config.UidMappings = []configs.IDMap{ {ContainerID: 0, HostID: userns_uid, Size: 1}, {ContainerID: 1, HostID: 1, Size: userns_uid - 1}, {ContainerID: userns_uid + 1, HostID: userns_uid + 1, Size: math.MaxInt32 - userns_uid}, } config.GidMappings = []configs.IDMap{ {ContainerID: 0, HostID: userns_uid, Size: 1}, {ContainerID: 1, HostID: 1, Size: userns_uid - 1}, {ContainerID: userns_uid + 1, HostID: userns_uid + 1, Size: math.MaxInt32 - userns_uid}, } for _, node := range config.Devices { node.Uid = uint32(userns_uid) node.Gid = uint32(userns_uid) } } for _, rawBind := range context.StringSlice("bind") { mount := &configs.Mount{ Device: "bind", Flags: syscall.MS_BIND | syscall.MS_REC, } parts := strings.SplitN(rawBind, ":", 3) switch len(parts) { default: logrus.Fatalf("invalid bind mount %s", rawBind) case 2: mount.Source, mount.Destination = parts[0], parts[1] case 3: mount.Source, mount.Destination = parts[0], parts[1] switch parts[2] { case "ro": mount.Flags |= syscall.MS_RDONLY case "rw": default: logrus.Fatalf("invalid bind mount mode %s", parts[2]) } } config.Mounts = append(config.Mounts, mount) } for _, tmpfs := range context.StringSlice("tmpfs") { config.Mounts = append(config.Mounts, &configs.Mount{ Device: "tmpfs", Destination: tmpfs, Flags: syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV, }) } for flag, value := range map[string]configs.NamespaceType{ "net": configs.NEWNET, "mnt": configs.NEWNS, "pid": configs.NEWPID, "ipc": configs.NEWIPC, "uts": configs.NEWUTS, } { switch v := context.String(flag); v { case "host": config.Namespaces.Remove(value) case "", "private": if !config.Namespaces.Contains(value) { config.Namespaces.Add(value, "") } if flag == "net" { config.Networks = []*configs.Network{ { Type: "loopback", Address: "127.0.0.1/0", Gateway: "localhost", }, } } if flag == "uts" { config.Hostname = context.String("hostname") } default: config.Namespaces.Remove(value) config.Namespaces.Add(value, v) } } if bridge := context.String("veth-bridge"); bridge != "" { hostName, err := utils.GenerateRandomName("veth", 7) if err != nil { logrus.Fatal(err) } network := &configs.Network{ Type: "veth", Name: "eth0", Bridge: bridge, Address: context.String("veth-address"), Gateway: context.String("veth-gateway"), Mtu: context.Int("veth-mtu"), HostInterfaceName: hostName, } config.Networks = append(config.Networks, network) } } func getTemplate() *configs.Config { cwd, err := os.Getwd() if err != nil { panic(err) } return &configs.Config{ Rootfs: cwd, ParentDeathSignal: int(syscall.SIGKILL), Capabilities: []string{ "CHOWN", "DAC_OVERRIDE", "FSETID", "FOWNER", "MKNOD", "NET_RAW", "SETGID", "SETUID", "SETFCAP", "SETPCAP", "NET_BIND_SERVICE", "SYS_CHROOT", "KILL", "AUDIT_WRITE", }, Namespaces: configs.Namespaces([]configs.Namespace{ {Type: configs.NEWNS}, {Type: configs.NEWUTS}, {Type: configs.NEWIPC}, {Type: configs.NEWPID}, {Type: configs.NEWNET}, }), Cgroups: &configs.Cgroup{ Name: filepath.Base(cwd), Parent: "nsinit", AllowAllDevices: false, AllowedDevices: configs.DefaultAllowedDevices, }, Devices: configs.DefaultAutoCreatedDevices, MaskPaths: []string{ "/proc/kcore", }, ReadonlyPaths: []string{ "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus", }, Mounts: []*configs.Mount{ { Source: "proc", Destination: "/proc", Device: "proc", Flags: defaultMountFlags, }, { Source: "tmpfs", Destination: "/dev", Device: "tmpfs", Flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, Data: "mode=755", }, { Source: "devpts", Destination: "/dev/pts", Device: "devpts", Flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, Data: "newinstance,ptmxmode=0666,mode=0620,gid=5", }, { Device: "tmpfs", Source: "shm", Destination: "/dev/shm", Data: "mode=1777,size=65536k", Flags: defaultMountFlags, }, { Source: "mqueue", Destination: "/dev/mqueue", Device: "mqueue", Flags: defaultMountFlags, }, { Source: "sysfs", Destination: "/sys", Device: "sysfs", Flags: defaultMountFlags | syscall.MS_RDONLY, }, }, Rlimits: []configs.Rlimit{ { Type: syscall.RLIMIT_NOFILE, Hard: 1024, Soft: 1024, }, }, } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/nsinit/tty.go0000644000175000017500000000222712524212370022634 0ustar tianontianonpackage main import ( "io" "os" "github.com/codegangsta/cli" "github.com/docker/docker/pkg/term" "github.com/docker/libcontainer" ) func newTty(context *cli.Context, p *libcontainer.Process, rootuid int) (*tty, error) { if context.Bool("tty") { console, err := p.NewConsole(rootuid) if err != nil { return nil, err } return &tty{ console: console, }, nil } return &tty{}, nil } type tty struct { console libcontainer.Console state *term.State } func (t *tty) Close() error { if t.console != nil { t.console.Close() } if t.state != nil { term.RestoreTerminal(os.Stdin.Fd(), t.state) } return nil } func (t *tty) attach(process *libcontainer.Process) error { if t.console != nil { go io.Copy(t.console, os.Stdin) go io.Copy(os.Stdout, t.console) state, err := term.SetRawTerminal(os.Stdin.Fd()) if err != nil { return err } t.state = state process.Stderr = nil process.Stdout = nil process.Stdin = nil } return nil } func (t *tty) resize() error { if t.console == nil { return nil } ws, err := term.GetWinsize(os.Stdin.Fd()) if err != nil { return err } return term.SetWinsize(t.console.Fd(), ws) } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/nsinit/exec.go0000644000175000017500000000517512524212370022745 0ustar tianontianonpackage main import ( "os" "os/exec" "os/signal" "syscall" "github.com/codegangsta/cli" "github.com/docker/libcontainer" "github.com/docker/libcontainer/utils" ) var standardEnvironment = &cli.StringSlice{ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "HOSTNAME=nsinit", "TERM=xterm", } var execCommand = cli.Command{ Name: "exec", Usage: "execute a new command inside a container", Action: execAction, Flags: append([]cli.Flag{ cli.BoolFlag{Name: "tty,t", Usage: "allocate a TTY to the container"}, cli.StringFlag{Name: "id", Value: "nsinit", Usage: "specify the ID for a container"}, cli.StringFlag{Name: "config", Value: "", Usage: "path to the configuration file"}, cli.StringFlag{Name: "user,u", Value: "root", Usage: "set the user, uid, and/or gid for the process"}, cli.StringFlag{Name: "cwd", Value: "", Usage: "set the current working dir"}, cli.StringSliceFlag{Name: "env", Value: standardEnvironment, Usage: "set environment variables for the process"}, }, createFlags...), } func execAction(context *cli.Context) { factory, err := loadFactory(context) if err != nil { fatal(err) } config, err := loadConfig(context) if err != nil { fatal(err) } created := false container, err := factory.Load(context.String("id")) if err != nil { created = true if container, err = factory.Create(context.String("id"), config); err != nil { fatal(err) } } process := &libcontainer.Process{ Args: context.Args(), Env: context.StringSlice("env"), User: context.String("user"), Cwd: context.String("cwd"), Stdin: os.Stdin, Stdout: os.Stdout, Stderr: os.Stderr, } rootuid, err := config.HostUID() if err != nil { fatal(err) } tty, err := newTty(context, process, rootuid) if err != nil { fatal(err) } if err := tty.attach(process); err != nil { fatal(err) } go handleSignals(process, tty) err = container.Start(process) if err != nil { tty.Close() if created { container.Destroy() } fatal(err) } status, err := process.Wait() if err != nil { exitError, ok := err.(*exec.ExitError) if ok { status = exitError.ProcessState } else { tty.Close() if created { container.Destroy() } fatal(err) } } if created { if err := container.Destroy(); err != nil { tty.Close() fatal(err) } } tty.Close() os.Exit(utils.ExitStatus(status.Sys().(syscall.WaitStatus))) } func handleSignals(container *libcontainer.Process, tty *tty) { sigc := make(chan os.Signal, 10) signal.Notify(sigc) tty.resize() for sig := range sigc { switch sig { case syscall.SIGWINCH: tty.resize() default: container.Signal(sig) } } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/nsinit/Makefile0000644000175000017500000000003312524212370023116 0ustar tianontianonall: go build -o nsinit . libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/nsinit/state.go0000644000175000017500000000111712524212370023131 0ustar tianontianonpackage main import ( "encoding/json" "fmt" "github.com/codegangsta/cli" ) var stateCommand = cli.Command{ Name: "state", Usage: "get the container's current state", Flags: []cli.Flag{ cli.StringFlag{Name: "id", Value: "nsinit", Usage: "specify the ID for a container"}, }, Action: func(context *cli.Context) { container, err := getContainer(context) if err != nil { fatal(err) } state, err := container.State() if err != nil { fatal(err) } data, err := json.MarshalIndent(state, "", "\t") if err != nil { fatal(err) } fmt.Printf("%s", data) }, } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/nsinit/utils.go0000644000175000017500000000233012524212370023147 0ustar tianontianonpackage main import ( "encoding/json" "fmt" "os" "github.com/codegangsta/cli" "github.com/docker/libcontainer" "github.com/docker/libcontainer/configs" ) func loadConfig(context *cli.Context) (*configs.Config, error) { if path := context.String("config"); path != "" { f, err := os.Open(path) if err != nil { return nil, err } defer f.Close() var config *configs.Config if err := json.NewDecoder(f).Decode(&config); err != nil { return nil, err } return config, nil } config := getTemplate() modify(config, context) return config, nil } func loadFactory(context *cli.Context) (libcontainer.Factory, error) { return libcontainer.New(context.GlobalString("root"), libcontainer.Cgroupfs) } func getContainer(context *cli.Context) (libcontainer.Container, error) { factory, err := loadFactory(context) if err != nil { return nil, err } container, err := factory.Load(context.String("id")) if err != nil { return nil, err } return container, nil } func fatal(err error) { if lerr, ok := err.(libcontainer.Error); ok { lerr.Detail(os.Stderr) os.Exit(1) } fmt.Fprintln(os.Stderr, err) os.Exit(1) } func fatalf(t string, v ...interface{}) { fmt.Fprintf(os.Stderr, t, v...) os.Exit(1) } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/LICENSE0000644000175000017500000002500612524212370021166 0ustar tianontianon Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS Copyright 2014 Docker, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/container_linux_test.go0000644000175000017500000001040712524212370024747 0ustar tianontianon// +build linux package libcontainer import ( "fmt" "os" "testing" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" ) type mockCgroupManager struct { pids []int stats *cgroups.Stats paths map[string]string } func (m *mockCgroupManager) GetPids() ([]int, error) { return m.pids, nil } func (m *mockCgroupManager) GetStats() (*cgroups.Stats, error) { return m.stats, nil } func (m *mockCgroupManager) Apply(pid int) error { return nil } func (m *mockCgroupManager) Set(container *configs.Config) error { return nil } func (m *mockCgroupManager) Destroy() error { return nil } func (m *mockCgroupManager) GetPaths() map[string]string { return m.paths } func (m *mockCgroupManager) Freeze(state configs.FreezerState) error { return nil } type mockProcess struct { _pid int started string } func (m *mockProcess) terminate() error { return nil } func (m *mockProcess) pid() int { return m._pid } func (m *mockProcess) startTime() (string, error) { return m.started, nil } func (m *mockProcess) start() error { return nil } func (m *mockProcess) wait() (*os.ProcessState, error) { return nil, nil } func (m *mockProcess) signal(_ os.Signal) error { return nil } func TestGetContainerPids(t *testing.T) { container := &linuxContainer{ id: "myid", config: &configs.Config{}, cgroupManager: &mockCgroupManager{pids: []int{1, 2, 3}}, } pids, err := container.Processes() if err != nil { t.Fatal(err) } for i, expected := range []int{1, 2, 3} { if pids[i] != expected { t.Fatalf("expected pid %d but received %d", expected, pids[i]) } } } func TestGetContainerStats(t *testing.T) { container := &linuxContainer{ id: "myid", config: &configs.Config{}, cgroupManager: &mockCgroupManager{ pids: []int{1, 2, 3}, stats: &cgroups.Stats{ MemoryStats: cgroups.MemoryStats{ Usage: 1024, }, }, }, } stats, err := container.Stats() if err != nil { t.Fatal(err) } if stats.CgroupStats == nil { t.Fatal("cgroup stats are nil") } if stats.CgroupStats.MemoryStats.Usage != 1024 { t.Fatalf("expected memory usage 1024 but recevied %d", stats.CgroupStats.MemoryStats.Usage) } } func TestGetContainerState(t *testing.T) { var ( pid = os.Getpid() expectedMemoryPath = "/sys/fs/cgroup/memory/myid" expectedNetworkPath = "/networks/fd" ) container := &linuxContainer{ id: "myid", config: &configs.Config{ Namespaces: []configs.Namespace{ {Type: configs.NEWPID}, {Type: configs.NEWNS}, {Type: configs.NEWNET, Path: expectedNetworkPath}, {Type: configs.NEWUTS}, // emulate host for IPC //{Type: configs.NEWIPC}, }, }, initProcess: &mockProcess{ _pid: pid, started: "010", }, cgroupManager: &mockCgroupManager{ pids: []int{1, 2, 3}, stats: &cgroups.Stats{ MemoryStats: cgroups.MemoryStats{ Usage: 1024, }, }, paths: map[string]string{ "memory": expectedMemoryPath, }, }, } state, err := container.State() if err != nil { t.Fatal(err) } if state.InitProcessPid != pid { t.Fatalf("expected pid %d but received %d", pid, state.InitProcessPid) } if state.InitProcessStartTime != "010" { t.Fatalf("expected process start time 010 but received %s", state.InitProcessStartTime) } paths := state.CgroupPaths if paths == nil { t.Fatal("cgroup paths should not be nil") } if memPath := paths["memory"]; memPath != expectedMemoryPath { t.Fatalf("expected memory path %q but received %q", expectedMemoryPath, memPath) } for _, ns := range container.config.Namespaces { path := state.NamespacePaths[ns.Type] if path == "" { t.Fatalf("expected non nil namespace path for %s", ns.Type) } if ns.Type == configs.NEWNET { if path != expectedNetworkPath { t.Fatalf("expected path %q but received %q", expectedNetworkPath, path) } } else { file := "" switch ns.Type { case configs.NEWNET: file = "net" case configs.NEWNS: file = "mnt" case configs.NEWPID: file = "pid" case configs.NEWIPC: file = "ipc" case configs.NEWUSER: file = "user" case configs.NEWUTS: file = "uts" } expected := fmt.Sprintf("/proc/%d/ns/%s", pid, file) if expected != path { t.Fatalf("expected path %q but received %q", expected, path) } } } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/standard_init_linux.go0000644000175000017500000000503112524212370024546 0ustar tianontianon// +build linux package libcontainer import ( "os" "syscall" "github.com/docker/libcontainer/apparmor" "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/label" "github.com/docker/libcontainer/system" ) type linuxStandardInit struct { parentPid int config *initConfig } func (l *linuxStandardInit) Init() error { // join any namespaces via a path to the namespace fd if provided if err := joinExistingNamespaces(l.config.Config.Namespaces); err != nil { return err } var console *linuxConsole if l.config.Console != "" { console = newConsoleFromPath(l.config.Console) if err := console.dupStdio(); err != nil { return err } } if _, err := syscall.Setsid(); err != nil { return err } if console != nil { if err := system.Setctty(); err != nil { return err } } if err := setupNetwork(l.config); err != nil { return err } if err := setupRoute(l.config.Config); err != nil { return err } if err := setupRlimits(l.config.Config); err != nil { return err } label.Init() // InitializeMountNamespace() can be executed only for a new mount namespace if l.config.Config.Namespaces.Contains(configs.NEWNS) { if err := setupRootfs(l.config.Config, console); err != nil { return err } } if hostname := l.config.Config.Hostname; hostname != "" { if err := syscall.Sethostname([]byte(hostname)); err != nil { return err } } if err := apparmor.ApplyProfile(l.config.Config.AppArmorProfile); err != nil { return err } if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil { return err } for _, path := range l.config.Config.ReadonlyPaths { if err := remountReadonly(path); err != nil { return err } } for _, path := range l.config.Config.MaskPaths { if err := maskFile(path); err != nil { return err } } pdeath, err := system.GetParentDeathSignal() if err != nil { return err } if err := finalizeNamespace(l.config); err != nil { return err } // finalizeNamespace can change user/group which clears the parent death // signal, so we restore it here. if err := pdeath.Restore(); err != nil { return err } // compare the parent from the inital start of the init process and make sure that it did not change. // if the parent changes that means it died and we were reparened to something else so we should // just kill ourself and not cause problems for someone else. if syscall.Getppid() != l.parentPid { return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) } return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ()) } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/process.go0000644000175000017500000000447012524212370022170 0ustar tianontianonpackage libcontainer import ( "fmt" "io" "math" "os" ) type processOperations interface { wait() (*os.ProcessState, error) signal(sig os.Signal) error pid() int } // Process specifies the configuration and IO for a process inside // a container. type Process struct { // The command to be run followed by any arguments. Args []string // Env specifies the environment variables for the process. Env []string // User will set the uid and gid of the executing process running inside the container // local to the contaienr's user and group configuration. User string // Cwd will change the processes current working directory inside the container's rootfs. Cwd string // Stdin is a pointer to a reader which provides the standard input stream. Stdin io.Reader // Stdout is a pointer to a writer which receives the standard output stream. Stdout io.Writer // Stderr is a pointer to a writer which receives the standard error stream. Stderr io.Writer // consolePath is the path to the console allocated to the container. consolePath string // Capabilities specify the capabilities to keep when executing the process inside the container // All capbilities not specified will be dropped from the processes capability mask Capabilities []string ops processOperations } // Wait waits for the process to exit. // Wait releases any resources associated with the Process func (p Process) Wait() (*os.ProcessState, error) { if p.ops == nil { return nil, newGenericError(fmt.Errorf("invalid process"), ProcessNotExecuted) } return p.ops.wait() } // Pid returns the process ID func (p Process) Pid() (int, error) { // math.MinInt32 is returned here, because it's invalid value // for the kill() system call. if p.ops == nil { return math.MinInt32, newGenericError(fmt.Errorf("invalid process"), ProcessNotExecuted) } return p.ops.pid(), nil } // Signal sends a signal to the Process. func (p Process) Signal(sig os.Signal) error { if p.ops == nil { return newGenericError(fmt.Errorf("invalid process"), ProcessNotExecuted) } return p.ops.signal(sig) } // NewConsole creates new console for process and returns it func (p *Process) NewConsole(rootuid int) (Console, error) { console, err := newConsole(rootuid, rootuid) if err != nil { return nil, err } p.consolePath = console.Path() return console, nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/system/0000755000175000017500000000000012524212370021502 5ustar tianontianonlibcontainer-227771c8f611f03639f0eeb169428761d9504ab5/system/syscall_linux_arm.go0000644000175000017500000000075012524212370025563 0ustar tianontianon// +build linux,arm package system import ( "syscall" ) // Setuid sets the uid of the calling thread to the specified uid. func Setuid(uid int) (err error) { _, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID32, uintptr(uid), 0, 0) if e1 != 0 { err = e1 } return } // Setgid sets the gid of the calling thread to the specified gid. func Setgid(gid int) (err error) { _, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID32, uintptr(gid), 0, 0) if e1 != 0 { err = e1 } return } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/system/sysconfig.go0000644000175000017500000000021012524212370024026 0ustar tianontianon// +build cgo package system /* #include */ import "C" func GetClockTicks() int { return int(C.sysconf(C._SC_CLK_TCK)) } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/system/linux.go0000644000175000017500000000267012524212370023175 0ustar tianontianon// +build linux package system import ( "os/exec" "syscall" "unsafe" ) type ParentDeathSignal int func (p ParentDeathSignal) Restore() error { if p == 0 { return nil } current, err := GetParentDeathSignal() if err != nil { return err } if p == current { return nil } return p.Set() } func (p ParentDeathSignal) Set() error { return SetParentDeathSignal(uintptr(p)) } func Execv(cmd string, args []string, env []string) error { name, err := exec.LookPath(cmd) if err != nil { return err } return syscall.Exec(name, args, env) } func SetParentDeathSignal(sig uintptr) error { if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, sig, 0); err != 0 { return err } return nil } func GetParentDeathSignal() (ParentDeathSignal, error) { var sig int _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0) if err != 0 { return -1, err } return ParentDeathSignal(sig), nil } func SetKeepCaps() error { if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_KEEPCAPS, 1, 0); err != 0 { return err } return nil } func ClearKeepCaps() error { if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_KEEPCAPS, 0, 0); err != 0 { return err } return nil } func Setctty() error { if _, _, err := syscall.RawSyscall(syscall.SYS_IOCTL, 0, uintptr(syscall.TIOCSCTTY), 0); err != 0 { return err } return nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/system/proc.go0000644000175000017500000000142212524212370022773 0ustar tianontianonpackage system import ( "io/ioutil" "path/filepath" "strconv" "strings" ) // look in /proc to find the process start time so that we can verify // that this pid has started after ourself func GetProcessStartTime(pid int) (string, error) { data, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat")) if err != nil { return "", err } parts := strings.Split(string(data), " ") // the starttime is located at pos 22 // from the man page // // starttime %llu (was %lu before Linux 2.6) // (22) The time the process started after system boot. In kernels before Linux 2.6, this // value was expressed in jiffies. Since Linux 2.6, the value is expressed in clock ticks // (divide by sysconf(_SC_CLK_TCK)). return parts[22-1], nil // starts at 1 } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/system/sysconfig_notcgo.go0000644000175000017500000000023012524212370025401 0ustar tianontianon// +build !cgo package system func GetClockTicks() int { // TODO figure out a better alternative for platforms where we're missing cgo return 100 } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/system/syscall_linux_386.go0000644000175000017500000000074612524212370025331 0ustar tianontianon// +build linux,386 package system import ( "syscall" ) // Setuid sets the uid of the calling thread to the specified uid. func Setuid(uid int) (err error) { _, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID, uintptr(uid), 0, 0) if e1 != 0 { err = e1 } return } // Setgid sets the gid of the calling thread to the specified gid. func Setgid(gid int) (err error) { _, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID32, uintptr(gid), 0, 0) if e1 != 0 { err = e1 } return } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/system/setns_linux.go0000644000175000017500000000150312524212370024403 0ustar tianontianonpackage system import ( "fmt" "runtime" "syscall" ) // Via http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7b21fddd087678a70ad64afc0f632e0f1071b092 // // We need different setns values for the different platforms and arch // We are declaring the macro here because the SETNS syscall does not exist in th stdlib var setNsMap = map[string]uintptr{ "linux/386": 346, "linux/amd64": 308, "linux/arm": 374, "linux/ppc64": 350, "linux/ppc64le": 350, "linux/s390x": 339, } func Setns(fd uintptr, flags uintptr) error { ns, exists := setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)] if !exists { return fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH) } _, _, err := syscall.RawSyscall(ns, fd, flags, 0) if err != 0 { return err } return nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/system/syscall_linux_64.go0000644000175000017500000000101512524212370025230 0ustar tianontianon// +build linux,amd64 linux,ppc64 linux,ppc64le linux,s390x package system import ( "syscall" ) // Setuid sets the uid of the calling thread to the specified uid. func Setuid(uid int) (err error) { _, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID, uintptr(uid), 0, 0) if e1 != 0 { err = e1 } return } // Setgid sets the gid of the calling thread to the specified gid. func Setgid(gid int) (err error) { _, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID, uintptr(gid), 0, 0) if e1 != 0 { err = e1 } return } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/system/xattrs_linux.go0000644000175000017500000000545512524212370024606 0ustar tianontianonpackage system import ( "syscall" "unsafe" ) var _zero uintptr // Returns the size of xattrs and nil error // Requires path, takes allocated []byte or nil as last argument func Llistxattr(path string, dest []byte) (size int, err error) { pathBytes, err := syscall.BytePtrFromString(path) if err != nil { return -1, err } var newpathBytes unsafe.Pointer if len(dest) > 0 { newpathBytes = unsafe.Pointer(&dest[0]) } else { newpathBytes = unsafe.Pointer(&_zero) } _size, _, errno := syscall.Syscall6(syscall.SYS_LLISTXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(newpathBytes), uintptr(len(dest)), 0, 0, 0) size = int(_size) if errno != 0 { return -1, errno } return size, nil } // Returns a []byte slice if the xattr is set and nil otherwise // Requires path and its attribute as arguments func Lgetxattr(path string, attr string) ([]byte, error) { var sz int pathBytes, err := syscall.BytePtrFromString(path) if err != nil { return nil, err } attrBytes, err := syscall.BytePtrFromString(attr) if err != nil { return nil, err } // Start with a 128 length byte array sz = 128 dest := make([]byte, sz) destBytes := unsafe.Pointer(&dest[0]) _sz, _, errno := syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0) switch { case errno == syscall.ENODATA: return nil, errno case errno == syscall.ENOTSUP: return nil, errno case errno == syscall.ERANGE: // 128 byte array might just not be good enough, // A dummy buffer is used ``uintptr(0)`` to get real size // of the xattrs on disk _sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(unsafe.Pointer(nil)), uintptr(0), 0, 0) sz = int(_sz) if sz < 0 { return nil, errno } dest = make([]byte, sz) destBytes := unsafe.Pointer(&dest[0]) _sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0) if errno != 0 { return nil, errno } case errno != 0: return nil, errno } sz = int(_sz) return dest[:sz], nil } func Lsetxattr(path string, attr string, data []byte, flags int) error { pathBytes, err := syscall.BytePtrFromString(path) if err != nil { return err } attrBytes, err := syscall.BytePtrFromString(attr) if err != nil { return err } var dataBytes unsafe.Pointer if len(data) > 0 { dataBytes = unsafe.Pointer(&data[0]) } else { dataBytes = unsafe.Pointer(&_zero) } _, _, errno := syscall.Syscall6(syscall.SYS_LSETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(dataBytes), uintptr(len(data)), uintptr(flags), 0) if errno != 0 { return errno } return nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/notify_linux_test.go0000644000175000017500000000437612524212370024305 0ustar tianontianon// +build linux package libcontainer import ( "encoding/binary" "fmt" "io/ioutil" "os" "path/filepath" "syscall" "testing" "time" ) func TestNotifyOnOOM(t *testing.T) { memoryPath, err := ioutil.TempDir("", "testnotifyoom-") if err != nil { t.Fatal(err) } oomPath := filepath.Join(memoryPath, "memory.oom_control") eventPath := filepath.Join(memoryPath, "cgroup.event_control") if err := ioutil.WriteFile(oomPath, []byte{}, 0700); err != nil { t.Fatal(err) } if err := ioutil.WriteFile(eventPath, []byte{}, 0700); err != nil { t.Fatal(err) } var eventFd, oomControlFd int paths := map[string]string{ "memory": memoryPath, } ooms, err := notifyOnOOM(paths) if err != nil { t.Fatal("expected no error, got:", err) } data, err := ioutil.ReadFile(eventPath) if err != nil { t.Fatal("couldn't read event control file:", err) } if _, err := fmt.Sscanf(string(data), "%d %d", &eventFd, &oomControlFd); err != nil { t.Fatalf("invalid control data %q: %s", data, err) } // re-open the eventfd efd, err := syscall.Dup(eventFd) if err != nil { t.Fatal("unable to reopen eventfd:", err) } defer syscall.Close(efd) if err != nil { t.Fatal("unable to dup event fd:", err) } buf := make([]byte, 8) binary.LittleEndian.PutUint64(buf, 1) if _, err := syscall.Write(efd, buf); err != nil { t.Fatal("unable to write to eventfd:", err) } select { case <-ooms: case <-time.After(100 * time.Millisecond): t.Fatal("no notification on oom channel after 100ms") } // simulate what happens when a cgroup is destroyed by cleaning up and then // writing to the eventfd. if err := os.RemoveAll(memoryPath); err != nil { t.Fatal(err) } if _, err := syscall.Write(efd, buf); err != nil { t.Fatal("unable to write to eventfd:", err) } // give things a moment to shut down select { case _, ok := <-ooms: if ok { t.Fatal("expected no oom to be triggered") } case <-time.After(100 * time.Millisecond): } if _, _, err := syscall.Syscall(syscall.SYS_FCNTL, uintptr(oomControlFd), syscall.F_GETFD, 0); err != syscall.EBADF { t.Error("expected oom control to be closed") } if _, _, err := syscall.Syscall(syscall.SYS_FCNTL, uintptr(eventFd), syscall.F_GETFD, 0); err != syscall.EBADF { t.Error("expected event fd to be closed") } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/docs/0000755000175000017500000000000012524212370021106 5ustar tianontianonlibcontainer-227771c8f611f03639f0eeb169428761d9504ab5/docs/man/0000755000175000017500000000000012524212370021661 5ustar tianontianonlibcontainer-227771c8f611f03639f0eeb169428761d9504ab5/docs/man/nsinit.1.md0000644000175000017500000000231012524212370023642 0ustar tianontianon% nsinit User Manual % docker/libcontainer % JAN 2015 NAME: nsinit - A low-level utility for managing containers. It is used to spawn new containers or join existing containers. USAGE: nsinit [global options] command [command options] [arguments...] VERSION: 0.1 COMMANDS: config display the container configuration exec execute a new command inside a container init runs the init process inside the namespace oom display oom notifications for a container pause pause the container's processes stats display statistics for the container unpause unpause the container's processes help, h shows a list of commands or help for one command EXAMPLES: Get the of an already running docker container. `sudo docker ps` will return the list of all the running containers. take the (e.g. 4addb0b2d307) and go to its config directory `/var/lib/docker/execdriver/native/4addb0b2d307` and here you can run the nsinit command line utility. e.g. `nsinit exec /bin/bash` will start a shell on the already running container. # HISTORY Jan 2015, Originally compiled by Shishir Mahajan (shishir dot mahajan at redhat dot com) based on nsinit source material and internal work. libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/rootfs_linux.go0000644000175000017500000002621412524212370023245 0ustar tianontianon// +build linux package libcontainer import ( "fmt" "io/ioutil" "os" "path/filepath" "strings" "syscall" "time" "github.com/docker/docker/pkg/symlink" "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/label" ) const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV // setupRootfs sets up the devices, mount points, and filesystems for use inside a // new mount namespace. func setupRootfs(config *configs.Config, console *linuxConsole) (err error) { if err := prepareRoot(config); err != nil { return newSystemError(err) } for _, m := range config.Mounts { if err := mountToRootfs(m, config.Rootfs, config.MountLabel); err != nil { return newSystemError(err) } } if err := createDevices(config); err != nil { return newSystemError(err) } if err := setupPtmx(config, console); err != nil { return newSystemError(err) } if err := setupDevSymlinks(config.Rootfs); err != nil { return newSystemError(err) } if err := syscall.Chdir(config.Rootfs); err != nil { return newSystemError(err) } if config.NoPivotRoot { err = msMoveRoot(config.Rootfs) } else { err = pivotRoot(config.Rootfs, config.PivotDir) } if err != nil { return newSystemError(err) } if err := reOpenDevNull(config.Rootfs); err != nil { return newSystemError(err) } if config.Readonlyfs { if err := setReadonly(); err != nil { return newSystemError(err) } } syscall.Umask(0022) return nil } func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error { var ( dest = m.Destination data = label.FormatMountLabel(m.Data, mountLabel) ) if !strings.HasPrefix(dest, rootfs) { dest = filepath.Join(rootfs, dest) } switch m.Device { case "proc", "mqueue", "sysfs": if err := os.MkdirAll(dest, 0755); err != nil && !os.IsExist(err) { return err } return syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), "") case "tmpfs": stat, err := os.Stat(dest) if err != nil { if err := os.MkdirAll(dest, 0755); err != nil && !os.IsExist(err) { return err } } if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data); err != nil { return err } if stat != nil { if err = os.Chmod(dest, stat.Mode()); err != nil { return err } } return nil case "devpts": if err := os.MkdirAll(dest, 0755); err != nil && !os.IsExist(err) { return err } return syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data) case "bind": stat, err := os.Stat(m.Source) if err != nil { // error out if the source of a bind mount does not exist as we will be // unable to bind anything to it. return err } // ensure that the destination of the bind mount is resolved of symlinks at mount time because // any previous mounts can invalidate the next mount's destination. // this can happen when a user specifies mounts within other mounts to cause breakouts or other // evil stuff to try to escape the container's rootfs. if dest, err = symlink.FollowSymlinkInScope(filepath.Join(rootfs, m.Destination), rootfs); err != nil { return err } if err := checkMountDestination(rootfs, dest); err != nil { return err } if err := createIfNotExists(dest, stat.IsDir()); err != nil { return err } if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data); err != nil { return err } if m.Flags&syscall.MS_RDONLY != 0 { if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags|syscall.MS_REMOUNT), ""); err != nil { return err } } if m.Relabel != "" { if err := label.Relabel(m.Source, mountLabel, m.Relabel); err != nil { return err } } if m.Flags&syscall.MS_PRIVATE != 0 { if err := syscall.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil { return err } } default: return fmt.Errorf("unknown mount device %q to %q", m.Device, m.Destination) } return nil } // checkMountDestination checks to ensure that the mount destination is not over the // top of /proc or /sys. // dest is required to be an abs path and have any symlinks resolved before calling this function. func checkMountDestination(rootfs, dest string) error { if filepath.Clean(rootfs) == filepath.Clean(dest) { return fmt.Errorf("mounting into / is prohibited") } invalidDestinations := []string{ "/proc", } for _, invalid := range invalidDestinations { path, err := filepath.Rel(filepath.Join(rootfs, invalid), dest) if err != nil { return err } if path == "." || !strings.HasPrefix(path, "..") { return fmt.Errorf("%q cannot be mounted because it is located inside %q", dest, invalid) } } return nil } func setupDevSymlinks(rootfs string) error { var links = [][2]string{ {"/proc/self/fd", "/dev/fd"}, {"/proc/self/fd/0", "/dev/stdin"}, {"/proc/self/fd/1", "/dev/stdout"}, {"/proc/self/fd/2", "/dev/stderr"}, } // kcore support can be toggled with CONFIG_PROC_KCORE; only create a symlink // in /dev if it exists in /proc. if _, err := os.Stat("/proc/kcore"); err == nil { links = append(links, [2]string{"/proc/kcore", "/dev/kcore"}) } for _, link := range links { var ( src = link[0] dst = filepath.Join(rootfs, link[1]) ) if err := os.Symlink(src, dst); err != nil && !os.IsExist(err) { return fmt.Errorf("symlink %s %s %s", src, dst, err) } } return nil } // If stdin, stdout, and/or stderr are pointing to `/dev/null` in the parent's rootfs // this method will make them point to `/dev/null` in this container's rootfs. This // needs to be called after we chroot/pivot into the container's rootfs so that any // symlinks are resolved locally. func reOpenDevNull(rootfs string) error { var stat, devNullStat syscall.Stat_t file, err := os.Open("/dev/null") if err != nil { return fmt.Errorf("Failed to open /dev/null - %s", err) } defer file.Close() if err := syscall.Fstat(int(file.Fd()), &devNullStat); err != nil { return err } for fd := 0; fd < 3; fd++ { if err := syscall.Fstat(fd, &stat); err != nil { return err } if stat.Rdev == devNullStat.Rdev { // Close and re-open the fd. if err := syscall.Dup2(int(file.Fd()), fd); err != nil { return err } } } return nil } // Create the device nodes in the container. func createDevices(config *configs.Config) error { oldMask := syscall.Umask(0000) for _, node := range config.Devices { // containers running in a user namespace are not allowed to mknod // devices so we can just bind mount it from the host. if err := createDeviceNode(config.Rootfs, node, config.Namespaces.Contains(configs.NEWUSER)); err != nil { syscall.Umask(oldMask) return err } } syscall.Umask(oldMask) return nil } // Creates the device node in the rootfs of the container. func createDeviceNode(rootfs string, node *configs.Device, bind bool) error { dest := filepath.Join(rootfs, node.Path) if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil { return err } if bind { f, err := os.Create(dest) if err != nil && !os.IsExist(err) { return err } if f != nil { f.Close() } return syscall.Mount(node.Path, dest, "bind", syscall.MS_BIND, "") } if err := mknodDevice(dest, node); err != nil { if os.IsExist(err) { return nil } return err } return nil } func mknodDevice(dest string, node *configs.Device) error { fileMode := node.FileMode switch node.Type { case 'c': fileMode |= syscall.S_IFCHR case 'b': fileMode |= syscall.S_IFBLK default: return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path) } if err := syscall.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil { return err } return syscall.Chown(dest, int(node.Uid), int(node.Gid)) } func prepareRoot(config *configs.Config) error { flag := syscall.MS_PRIVATE | syscall.MS_REC if config.NoPivotRoot { flag = syscall.MS_SLAVE | syscall.MS_REC } if err := syscall.Mount("", "/", "", uintptr(flag), ""); err != nil { return err } return syscall.Mount(config.Rootfs, config.Rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, "") } func setReadonly() error { return syscall.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, "") } func setupPtmx(config *configs.Config, console *linuxConsole) error { ptmx := filepath.Join(config.Rootfs, "dev/ptmx") if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) { return err } if err := os.Symlink("pts/ptmx", ptmx); err != nil { return fmt.Errorf("symlink dev ptmx %s", err) } if console != nil { return console.mount(config.Rootfs, config.MountLabel, 0, 0) } return nil } func pivotRoot(rootfs, pivotBaseDir string) error { if pivotBaseDir == "" { pivotBaseDir = "/" } tmpDir := filepath.Join(rootfs, pivotBaseDir) if err := os.MkdirAll(tmpDir, 0755); err != nil { return fmt.Errorf("can't create tmp dir %s, error %v", tmpDir, err) } pivotDir, err := ioutil.TempDir(tmpDir, ".pivot_root") if err != nil { return fmt.Errorf("can't create pivot_root dir %s, error %v", pivotDir, err) } if err := syscall.PivotRoot(rootfs, pivotDir); err != nil { return fmt.Errorf("pivot_root %s", err) } if err := syscall.Chdir("/"); err != nil { return fmt.Errorf("chdir / %s", err) } // path to pivot dir now changed, update pivotDir = filepath.Join(pivotBaseDir, filepath.Base(pivotDir)) if err := syscall.Unmount(pivotDir, syscall.MNT_DETACH); err != nil { return fmt.Errorf("unmount pivot_root dir %s", err) } return os.Remove(pivotDir) } func msMoveRoot(rootfs string) error { if err := syscall.Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil { return err } if err := syscall.Chroot("."); err != nil { return err } return syscall.Chdir("/") } // createIfNotExists creates a file or a directory only if it does not already exist. func createIfNotExists(path string, isDir bool) error { if _, err := os.Stat(path); err != nil { if os.IsNotExist(err) { if isDir { return os.MkdirAll(path, 0755) } if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { return err } f, err := os.OpenFile(path, os.O_CREATE, 0755) if err != nil { return err } f.Close() } } return nil } // remountReadonly will bind over the top of an existing path and ensure that it is read-only. func remountReadonly(path string) error { for i := 0; i < 5; i++ { if err := syscall.Mount("", path, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil && !os.IsNotExist(err) { switch err { case syscall.EINVAL: // Probably not a mountpoint, use bind-mount if err := syscall.Mount(path, path, "", syscall.MS_BIND, ""); err != nil { return err } return syscall.Mount(path, path, "", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC|defaultMountFlags, "") case syscall.EBUSY: time.Sleep(100 * time.Millisecond) continue default: return err } } return nil } return fmt.Errorf("unable to mount %s as readonly max retries reached", path) } // maskFile bind mounts /dev/null over the top of the specified path inside a container // to avoid security issues from processes reading information from non-namespace aware mounts ( proc/kcore ). func maskFile(path string) error { if err := syscall.Mount("/dev/null", path, "", syscall.MS_BIND, ""); err != nil && !os.IsNotExist(err) { return err } return nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/factory.go0000644000175000017500000000322112524212370022152 0ustar tianontianonpackage libcontainer import ( "github.com/docker/libcontainer/configs" ) type Factory interface { // Creates a new container with the given id and starts the initial process inside it. // id must be a string containing only letters, digits and underscores and must contain // between 1 and 1024 characters, inclusive. // // The id must not already be in use by an existing container. Containers created using // a factory with the same path (and file system) must have distinct ids. // // Returns the new container with a running process. // // errors: // IdInUse - id is already in use by a container // InvalidIdFormat - id has incorrect format // ConfigInvalid - config is invalid // Systemerror - System error // // On error, any partially created container parts are cleaned up (the operation is atomic). Create(id string, config *configs.Config) (Container, error) // Load takes an ID for an existing container and returns the container information // from the state. This presents a read only view of the container. // // errors: // Path does not exist // Container is stopped // System error Load(id string) (Container, error) // StartInitialization is an internal API to libcontainer used during the rexec of the // container. pipefd is the fd to the child end of the pipe used to syncronize the // parent and child process providing state and configuration to the child process and // returning any errors during the init of the container // // Errors: // pipe connection error // system error StartInitialization(pipefd uintptr) error // Type returns info string about factory type (e.g. lxc, libcontainer...) Type() string } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/NOTICE0000644000175000017500000000101512524212370021057 0ustar tianontianonlibcontainer Copyright 2012-2015 Docker, Inc. This product includes software developed at Docker, Inc. (http://www.docker.com). The following is courtesy of our legal counsel: Use and transfer of Docker may be subject to certain restrictions by the United States and other governments. It is your responsibility to ensure that your use and/or transfer does not violate applicable laws. For more information, please see http://www.bis.doc.gov See also http://www.apache.org/dev/crypto.html and/or seek legal counsel. libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/setns_init_linux.go0000644000175000017500000000151112524212370024101 0ustar tianontianon// +build linux package libcontainer import ( "os" "github.com/docker/libcontainer/apparmor" "github.com/docker/libcontainer/label" "github.com/docker/libcontainer/system" ) // linuxSetnsInit performs the container's initialization for running a new process // inside an existing container. type linuxSetnsInit struct { config *initConfig } func (l *linuxSetnsInit) Init() error { if err := setupRlimits(l.config.Config); err != nil { return err } if err := finalizeNamespace(l.config); err != nil { return err } if err := apparmor.ApplyProfile(l.config.Config.AppArmorProfile); err != nil { return err } if l.config.Config.ProcessLabel != "" { if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil { return err } } return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ()) } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/.gitignore0000644000175000017500000000002612524212370022144 0ustar tianontianonbundles nsinit/nsinit libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/selinux/0000755000175000017500000000000012524212370021645 5ustar tianontianonlibcontainer-227771c8f611f03639f0eeb169428761d9504ab5/selinux/selinux.go0000644000175000017500000002415012524212370023665 0ustar tianontianon// +build linux package selinux import ( "bufio" "crypto/rand" "encoding/binary" "fmt" "io" "os" "path/filepath" "regexp" "strconv" "strings" "syscall" "github.com/docker/docker/pkg/mount" "github.com/docker/libcontainer/system" ) const ( Enforcing = 1 Permissive = 0 Disabled = -1 selinuxDir = "/etc/selinux/" selinuxConfig = selinuxDir + "config" selinuxTypeTag = "SELINUXTYPE" selinuxTag = "SELINUX" selinuxPath = "/sys/fs/selinux" xattrNameSelinux = "security.selinux" stRdOnly = 0x01 ) var ( assignRegex = regexp.MustCompile(`^([^=]+)=(.*)$`) spaceRegex = regexp.MustCompile(`^([^=]+) (.*)$`) mcsList = make(map[string]bool) selinuxfs = "unknown" selinuxEnabled = false // Stores whether selinux is currently enabled selinuxEnabledChecked = false // Stores whether selinux enablement has been checked or established yet ) type SELinuxContext map[string]string // SetDisabled disables selinux support for the package func SetDisabled() { selinuxEnabled, selinuxEnabledChecked = false, true } // getSelinuxMountPoint returns the path to the mountpoint of an selinuxfs // filesystem or an empty string if no mountpoint is found. Selinuxfs is // a proc-like pseudo-filesystem that exposes the selinux policy API to // processes. The existence of an selinuxfs mount is used to determine // whether selinux is currently enabled or not. func getSelinuxMountPoint() string { if selinuxfs != "unknown" { return selinuxfs } selinuxfs = "" mounts, err := mount.GetMounts() if err != nil { return selinuxfs } for _, mount := range mounts { if mount.Fstype == "selinuxfs" { selinuxfs = mount.Mountpoint break } } if selinuxfs != "" { var buf syscall.Statfs_t syscall.Statfs(selinuxfs, &buf) if (buf.Flags & stRdOnly) == 1 { selinuxfs = "" } } return selinuxfs } // SelinuxEnabled returns whether selinux is currently enabled. func SelinuxEnabled() bool { if selinuxEnabledChecked { return selinuxEnabled } selinuxEnabledChecked = true if fs := getSelinuxMountPoint(); fs != "" { if con, _ := Getcon(); con != "kernel" { selinuxEnabled = true } } return selinuxEnabled } func readConfig(target string) (value string) { var ( val, key string bufin *bufio.Reader ) in, err := os.Open(selinuxConfig) if err != nil { return "" } defer in.Close() bufin = bufio.NewReader(in) for done := false; !done; { var line string if line, err = bufin.ReadString('\n'); err != nil { if err != io.EOF { return "" } done = true } line = strings.TrimSpace(line) if len(line) == 0 { // Skip blank lines continue } if line[0] == ';' || line[0] == '#' { // Skip comments continue } if groups := assignRegex.FindStringSubmatch(line); groups != nil { key, val = strings.TrimSpace(groups[1]), strings.TrimSpace(groups[2]) if key == target { return strings.Trim(val, "\"") } } } return "" } func getSELinuxPolicyRoot() string { return selinuxDir + readConfig(selinuxTypeTag) } func readCon(name string) (string, error) { var val string in, err := os.Open(name) if err != nil { return "", err } defer in.Close() _, err = fmt.Fscanf(in, "%s", &val) return val, err } // Setfilecon sets the SELinux label for this path or returns an error. func Setfilecon(path string, scon string) error { return system.Lsetxattr(path, xattrNameSelinux, []byte(scon), 0) } // Getfilecon returns the SELinux label for this path or returns an error. func Getfilecon(path string) (string, error) { con, err := system.Lgetxattr(path, xattrNameSelinux) // Trim the NUL byte at the end of the byte buffer, if present. if con[len(con)-1] == '\x00' { con = con[:len(con)-1] } return string(con), err } func Setfscreatecon(scon string) error { return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/fscreate", syscall.Gettid()), scon) } func Getfscreatecon() (string, error) { return readCon(fmt.Sprintf("/proc/self/task/%d/attr/fscreate", syscall.Gettid())) } // Getcon returns the SELinux label of the current process thread, or an error. func Getcon() (string, error) { return readCon(fmt.Sprintf("/proc/self/task/%d/attr/current", syscall.Gettid())) } // Getpidcon returns the SELinux label of the given pid, or an error. func Getpidcon(pid int) (string, error) { return readCon(fmt.Sprintf("/proc/%d/attr/current", pid)) } func Getexeccon() (string, error) { return readCon(fmt.Sprintf("/proc/self/task/%d/attr/exec", syscall.Gettid())) } func writeCon(name string, val string) error { out, err := os.OpenFile(name, os.O_WRONLY, 0) if err != nil { return err } defer out.Close() if val != "" { _, err = out.Write([]byte(val)) } else { _, err = out.Write(nil) } return err } func Setexeccon(scon string) error { return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/exec", syscall.Gettid()), scon) } func (c SELinuxContext) Get() string { return fmt.Sprintf("%s:%s:%s:%s", c["user"], c["role"], c["type"], c["level"]) } func NewContext(scon string) SELinuxContext { c := make(SELinuxContext) if len(scon) != 0 { con := strings.SplitN(scon, ":", 4) c["user"] = con[0] c["role"] = con[1] c["type"] = con[2] c["level"] = con[3] } return c } func ReserveLabel(scon string) { if len(scon) != 0 { con := strings.SplitN(scon, ":", 4) mcsAdd(con[3]) } } func SelinuxGetEnforce() int { var enforce int enforceS, err := readCon(fmt.Sprintf("%s/enforce", selinuxPath)) if err != nil { return -1 } enforce, err = strconv.Atoi(string(enforceS)) if err != nil { return -1 } return enforce } func SelinuxGetEnforceMode() int { switch readConfig(selinuxTag) { case "enforcing": return Enforcing case "permissive": return Permissive } return Disabled } func mcsAdd(mcs string) error { if mcsList[mcs] { return fmt.Errorf("MCS Label already exists") } mcsList[mcs] = true return nil } func mcsDelete(mcs string) { mcsList[mcs] = false } func mcsExists(mcs string) bool { return mcsList[mcs] } func IntToMcs(id int, catRange uint32) string { var ( SETSIZE = int(catRange) TIER = SETSIZE ORD = id ) if id < 1 || id > 523776 { return "" } for ORD > TIER { ORD = ORD - TIER TIER -= 1 } TIER = SETSIZE - TIER ORD = ORD + TIER return fmt.Sprintf("s0:c%d,c%d", TIER, ORD) } func uniqMcs(catRange uint32) string { var ( n uint32 c1, c2 uint32 mcs string ) for { binary.Read(rand.Reader, binary.LittleEndian, &n) c1 = n % catRange binary.Read(rand.Reader, binary.LittleEndian, &n) c2 = n % catRange if c1 == c2 { continue } else { if c1 > c2 { t := c1 c1 = c2 c2 = t } } mcs = fmt.Sprintf("s0:c%d,c%d", c1, c2) if err := mcsAdd(mcs); err != nil { continue } break } return mcs } func FreeLxcContexts(scon string) { if len(scon) != 0 { con := strings.SplitN(scon, ":", 4) mcsDelete(con[3]) } } func GetLxcContexts() (processLabel string, fileLabel string) { var ( val, key string bufin *bufio.Reader ) if !SelinuxEnabled() { return "", "" } lxcPath := fmt.Sprintf("%s/contexts/lxc_contexts", getSELinuxPolicyRoot()) in, err := os.Open(lxcPath) if err != nil { return "", "" } defer in.Close() bufin = bufio.NewReader(in) for done := false; !done; { var line string if line, err = bufin.ReadString('\n'); err != nil { if err == io.EOF { done = true } else { goto exit } } line = strings.TrimSpace(line) if len(line) == 0 { // Skip blank lines continue } if line[0] == ';' || line[0] == '#' { // Skip comments continue } if groups := assignRegex.FindStringSubmatch(line); groups != nil { key, val = strings.TrimSpace(groups[1]), strings.TrimSpace(groups[2]) if key == "process" { processLabel = strings.Trim(val, "\"") } if key == "file" { fileLabel = strings.Trim(val, "\"") } } } if processLabel == "" || fileLabel == "" { return "", "" } exit: // mcs := IntToMcs(os.Getpid(), 1024) mcs := uniqMcs(1024) scon := NewContext(processLabel) scon["level"] = mcs processLabel = scon.Get() scon = NewContext(fileLabel) scon["level"] = mcs fileLabel = scon.Get() return processLabel, fileLabel } func SecurityCheckContext(val string) error { return writeCon(fmt.Sprintf("%s.context", selinuxPath), val) } func CopyLevel(src, dest string) (string, error) { if src == "" { return "", nil } if err := SecurityCheckContext(src); err != nil { return "", err } if err := SecurityCheckContext(dest); err != nil { return "", err } scon := NewContext(src) tcon := NewContext(dest) mcsDelete(tcon["level"]) mcsAdd(scon["level"]) tcon["level"] = scon["level"] return tcon.Get(), nil } // Prevent users from relabing system files func badPrefix(fpath string) error { var badprefixes = []string{"/usr"} for _, prefix := range badprefixes { if fpath == prefix || strings.HasPrefix(fpath, fmt.Sprintf("%s/", prefix)) { return fmt.Errorf("Relabeling content in %s is not allowed.", prefix) } } return nil } // Change the fpath file object to the SELinux label scon. // If the fpath is a directory and recurse is true Chcon will walk the // directory tree setting the label func Chcon(fpath string, scon string, recurse bool) error { if scon == "" { return nil } if err := badPrefix(fpath); err != nil { return err } callback := func(p string, info os.FileInfo, err error) error { return Setfilecon(p, scon) } if recurse { return filepath.Walk(fpath, callback) } return Setfilecon(fpath, scon) } // DupSecOpt takes an SELinux process label and returns security options that // can will set the SELinux Type and Level for future container processes func DupSecOpt(src string) []string { if src == "" { return nil } con := NewContext(src) if con["user"] == "" || con["role"] == "" || con["type"] == "" || con["level"] == "" { return nil } return []string{"label:user:" + con["user"], "label:role:" + con["role"], "label:type:" + con["type"], "label:level:" + con["level"]} } // DisableSecOpt returns a security opt that can be used to disabling SELinux // labeling support for future container processes func DisableSecOpt() []string { return []string{"label:disable"} } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/selinux/selinux_test.go0000644000175000017500000000256112524212370024726 0ustar tianontianon// +build linux package selinux_test import ( "os" "testing" "github.com/docker/libcontainer/selinux" ) func testSetfilecon(t *testing.T) { if selinux.SelinuxEnabled() { tmp := "selinux_test" out, _ := os.OpenFile(tmp, os.O_WRONLY, 0) out.Close() err := selinux.Setfilecon(tmp, "system_u:object_r:bin_t:s0") if err != nil { t.Log("Setfilecon failed") t.Fatal(err) } os.Remove(tmp) } } func TestSELinux(t *testing.T) { var ( err error plabel, flabel string ) if selinux.SelinuxEnabled() { t.Log("Enabled") plabel, flabel = selinux.GetLxcContexts() t.Log(plabel) t.Log(flabel) selinux.FreeLxcContexts(plabel) plabel, flabel = selinux.GetLxcContexts() t.Log(plabel) t.Log(flabel) selinux.FreeLxcContexts(plabel) t.Log("getenforce ", selinux.SelinuxGetEnforce()) t.Log("getenforcemode ", selinux.SelinuxGetEnforceMode()) pid := os.Getpid() t.Logf("PID:%d MCS:%s\n", pid, selinux.IntToMcs(pid, 1023)) err = selinux.Setfscreatecon("unconfined_u:unconfined_r:unconfined_t:s0") if err == nil { t.Log(selinux.Getfscreatecon()) } else { t.Log("setfscreatecon failed", err) t.Fatal(err) } err = selinux.Setfscreatecon("") if err == nil { t.Log(selinux.Getfscreatecon()) } else { t.Log("setfscreatecon failed", err) t.Fatal(err) } t.Log(selinux.Getpidcon(1)) } else { t.Log("Disabled") } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/stacktrace/0000755000175000017500000000000012524212370022302 5ustar tianontianonlibcontainer-227771c8f611f03639f0eeb169428761d9504ab5/stacktrace/frame.go0000644000175000017500000000127012524212370023723 0ustar tianontianonpackage stacktrace import ( "path/filepath" "runtime" "strings" ) // NewFrame returns a new stack frame for the provided information func NewFrame(pc uintptr, file string, line int) Frame { fn := runtime.FuncForPC(pc) pack, name := parseFunctionName(fn.Name()) return Frame{ Line: line, File: filepath.Base(file), Package: pack, Function: name, } } func parseFunctionName(name string) (string, string) { i := strings.LastIndex(name, ".") if i == -1 { return "", name } return name[:i], name[i+1:] } // Frame contains all the information for a stack frame within a go program type Frame struct { File string Function string Package string Line int } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/stacktrace/capture.go0000644000175000017500000000071512524212370024277 0ustar tianontianonpackage stacktrace import "runtime" // Caputure captures a stacktrace for the current calling go program // // skip is the number of frames to skip func Capture(userSkip int) Stacktrace { var ( skip = userSkip + 1 // add one for our own function frames []Frame ) for i := skip; ; i++ { pc, file, line, ok := runtime.Caller(i) if !ok { break } frames = append(frames, NewFrame(pc, file, line)) } return Stacktrace{ Frames: frames, } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/stacktrace/frame_test.go0000644000175000017500000000103112524212370024755 0ustar tianontianonpackage stacktrace import "testing" func TestParsePackageName(t *testing.T) { var ( name = "github.com/docker/libcontainer/stacktrace.captureFunc" expectedPackage = "github.com/docker/libcontainer/stacktrace" expectedFunction = "captureFunc" ) pack, funcName := parseFunctionName(name) if pack != expectedPackage { t.Fatalf("expected package %q but received %q", expectedPackage, pack) } if funcName != expectedFunction { t.Fatalf("expected function %q but received %q", expectedFunction, funcName) } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/stacktrace/stacktrace.go0000644000175000017500000000007712524212370024761 0ustar tianontianonpackage stacktrace type Stacktrace struct { Frames []Frame } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/stacktrace/capture_test.go0000644000175000017500000000134612524212370025337 0ustar tianontianonpackage stacktrace import "testing" func captureFunc() Stacktrace { return Capture(0) } func TestCaptureTestFunc(t *testing.T) { stack := captureFunc() if len(stack.Frames) == 0 { t.Fatal("expected stack frames to be returned") } // the first frame is the caller frame := stack.Frames[0] if expected := "captureFunc"; frame.Function != expected { t.Fatalf("expteced function %q but recevied %q", expected, frame.Function) } if expected := "github.com/docker/libcontainer/stacktrace"; frame.Package != expected { t.Fatalf("expected package %q but received %q", expected, frame.Package) } if expected := "capture_test.go"; frame.File != expected { t.Fatalf("expected file %q but received %q", expected, frame.File) } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/netlink/0000755000175000017500000000000012524212370021622 5ustar tianontianonlibcontainer-227771c8f611f03639f0eeb169428761d9504ab5/netlink/netlink_linux_notarm.go0000644000175000017500000000012312524212370026410 0ustar tianontianon// +build !arm package netlink func ifrDataByte(b byte) int8 { return int8(b) } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/netlink/netlink.go0000644000175000017500000000125712524212370023622 0ustar tianontianon// Packet netlink provide access to low level Netlink sockets and messages. // // Actual implementations are in: // netlink_linux.go // netlink_darwin.go package netlink import ( "errors" "net" ) var ( ErrWrongSockType = errors.New("Wrong socket type") ErrShortResponse = errors.New("Got short response from netlink") ErrInterfaceExists = errors.New("Network interface already exists") ) // A Route is a subnet associated with the interface to reach it. type Route struct { *net.IPNet Iface *net.Interface Default bool } // An IfAddr defines IP network settings for a given network interface type IfAddr struct { Iface *net.Interface IP net.IP IPNet *net.IPNet } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/netlink/MAINTAINERS0000644000175000017500000000016212524212370023316 0ustar tianontianonMichael Crosby (@crosbymichael) Guillaume J. Charmes (@creack) libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/netlink/netlink_linux.go0000644000175000017500000007323112524212370025042 0ustar tianontianonpackage netlink import ( "encoding/binary" "fmt" "io" "math/rand" "net" "os" "sync/atomic" "syscall" "unsafe" ) const ( IFNAMSIZ = 16 DEFAULT_CHANGE = 0xFFFFFFFF IFLA_INFO_KIND = 1 IFLA_INFO_DATA = 2 VETH_INFO_PEER = 1 IFLA_MACVLAN_MODE = 1 IFLA_VLAN_ID = 1 IFLA_NET_NS_FD = 28 IFLA_ADDRESS = 1 IFLA_BRPORT_MODE = 4 SIOC_BRADDBR = 0x89a0 SIOC_BRDELBR = 0x89a1 SIOC_BRADDIF = 0x89a2 ) const ( MACVLAN_MODE_PRIVATE = 1 << iota MACVLAN_MODE_VEPA MACVLAN_MODE_BRIDGE MACVLAN_MODE_PASSTHRU ) var nextSeqNr uint32 type ifreqHwaddr struct { IfrnName [IFNAMSIZ]byte IfruHwaddr syscall.RawSockaddr } type ifreqIndex struct { IfrnName [IFNAMSIZ]byte IfruIndex int32 } type ifreqFlags struct { IfrnName [IFNAMSIZ]byte Ifruflags uint16 } var native binary.ByteOrder func init() { var x uint32 = 0x01020304 if *(*byte)(unsafe.Pointer(&x)) == 0x01 { native = binary.BigEndian } else { native = binary.LittleEndian } } func getIpFamily(ip net.IP) int { if len(ip) <= net.IPv4len { return syscall.AF_INET } if ip.To4() != nil { return syscall.AF_INET } return syscall.AF_INET6 } type NetlinkRequestData interface { Len() int ToWireFormat() []byte } type IfInfomsg struct { syscall.IfInfomsg } func newIfInfomsg(family int) *IfInfomsg { return &IfInfomsg{ IfInfomsg: syscall.IfInfomsg{ Family: uint8(family), }, } } func newIfInfomsgChild(parent *RtAttr, family int) *IfInfomsg { msg := newIfInfomsg(family) parent.children = append(parent.children, msg) return msg } func (msg *IfInfomsg) ToWireFormat() []byte { length := syscall.SizeofIfInfomsg b := make([]byte, length) b[0] = msg.Family b[1] = 0 native.PutUint16(b[2:4], msg.Type) native.PutUint32(b[4:8], uint32(msg.Index)) native.PutUint32(b[8:12], msg.Flags) native.PutUint32(b[12:16], msg.Change) return b } func (msg *IfInfomsg) Len() int { return syscall.SizeofIfInfomsg } type IfAddrmsg struct { syscall.IfAddrmsg } func newIfAddrmsg(family int) *IfAddrmsg { return &IfAddrmsg{ IfAddrmsg: syscall.IfAddrmsg{ Family: uint8(family), }, } } func (msg *IfAddrmsg) ToWireFormat() []byte { length := syscall.SizeofIfAddrmsg b := make([]byte, length) b[0] = msg.Family b[1] = msg.Prefixlen b[2] = msg.Flags b[3] = msg.Scope native.PutUint32(b[4:8], msg.Index) return b } func (msg *IfAddrmsg) Len() int { return syscall.SizeofIfAddrmsg } type RtMsg struct { syscall.RtMsg } func newRtMsg() *RtMsg { return &RtMsg{ RtMsg: syscall.RtMsg{ Table: syscall.RT_TABLE_MAIN, Scope: syscall.RT_SCOPE_UNIVERSE, Protocol: syscall.RTPROT_BOOT, Type: syscall.RTN_UNICAST, }, } } func (msg *RtMsg) ToWireFormat() []byte { length := syscall.SizeofRtMsg b := make([]byte, length) b[0] = msg.Family b[1] = msg.Dst_len b[2] = msg.Src_len b[3] = msg.Tos b[4] = msg.Table b[5] = msg.Protocol b[6] = msg.Scope b[7] = msg.Type native.PutUint32(b[8:12], msg.Flags) return b } func (msg *RtMsg) Len() int { return syscall.SizeofRtMsg } func rtaAlignOf(attrlen int) int { return (attrlen + syscall.RTA_ALIGNTO - 1) & ^(syscall.RTA_ALIGNTO - 1) } type RtAttr struct { syscall.RtAttr Data []byte children []NetlinkRequestData } func newRtAttr(attrType int, data []byte) *RtAttr { return &RtAttr{ RtAttr: syscall.RtAttr{ Type: uint16(attrType), }, children: []NetlinkRequestData{}, Data: data, } } func newRtAttrChild(parent *RtAttr, attrType int, data []byte) *RtAttr { attr := newRtAttr(attrType, data) parent.children = append(parent.children, attr) return attr } func (a *RtAttr) Len() int { if len(a.children) == 0 { return (syscall.SizeofRtAttr + len(a.Data)) } l := 0 for _, child := range a.children { l += child.Len() } l += syscall.SizeofRtAttr return rtaAlignOf(l + len(a.Data)) } func (a *RtAttr) ToWireFormat() []byte { length := a.Len() buf := make([]byte, rtaAlignOf(length)) if a.Data != nil { copy(buf[4:], a.Data) } else { next := 4 for _, child := range a.children { childBuf := child.ToWireFormat() copy(buf[next:], childBuf) next += rtaAlignOf(len(childBuf)) } } if l := uint16(length); l != 0 { native.PutUint16(buf[0:2], l) } native.PutUint16(buf[2:4], a.Type) return buf } func uint32Attr(t int, n uint32) *RtAttr { buf := make([]byte, 4) native.PutUint32(buf, n) return newRtAttr(t, buf) } type NetlinkRequest struct { syscall.NlMsghdr Data []NetlinkRequestData } func (rr *NetlinkRequest) ToWireFormat() []byte { length := rr.Len dataBytes := make([][]byte, len(rr.Data)) for i, data := range rr.Data { dataBytes[i] = data.ToWireFormat() length += uint32(len(dataBytes[i])) } b := make([]byte, length) native.PutUint32(b[0:4], length) native.PutUint16(b[4:6], rr.Type) native.PutUint16(b[6:8], rr.Flags) native.PutUint32(b[8:12], rr.Seq) native.PutUint32(b[12:16], rr.Pid) next := 16 for _, data := range dataBytes { copy(b[next:], data) next += len(data) } return b } func (rr *NetlinkRequest) AddData(data NetlinkRequestData) { if data != nil { rr.Data = append(rr.Data, data) } } func newNetlinkRequest(proto, flags int) *NetlinkRequest { return &NetlinkRequest{ NlMsghdr: syscall.NlMsghdr{ Len: uint32(syscall.NLMSG_HDRLEN), Type: uint16(proto), Flags: syscall.NLM_F_REQUEST | uint16(flags), Seq: atomic.AddUint32(&nextSeqNr, 1), }, } } type NetlinkSocket struct { fd int lsa syscall.SockaddrNetlink } func getNetlinkSocket() (*NetlinkSocket, error) { fd, err := syscall.Socket(syscall.AF_NETLINK, syscall.SOCK_RAW, syscall.NETLINK_ROUTE) if err != nil { return nil, err } s := &NetlinkSocket{ fd: fd, } s.lsa.Family = syscall.AF_NETLINK if err := syscall.Bind(fd, &s.lsa); err != nil { syscall.Close(fd) return nil, err } return s, nil } func (s *NetlinkSocket) Close() { syscall.Close(s.fd) } func (s *NetlinkSocket) Send(request *NetlinkRequest) error { if err := syscall.Sendto(s.fd, request.ToWireFormat(), 0, &s.lsa); err != nil { return err } return nil } func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, error) { rb := make([]byte, syscall.Getpagesize()) nr, _, err := syscall.Recvfrom(s.fd, rb, 0) if err != nil { return nil, err } if nr < syscall.NLMSG_HDRLEN { return nil, ErrShortResponse } rb = rb[:nr] return syscall.ParseNetlinkMessage(rb) } func (s *NetlinkSocket) GetPid() (uint32, error) { lsa, err := syscall.Getsockname(s.fd) if err != nil { return 0, err } switch v := lsa.(type) { case *syscall.SockaddrNetlink: return v.Pid, nil } return 0, ErrWrongSockType } func (s *NetlinkSocket) CheckMessage(m syscall.NetlinkMessage, seq, pid uint32) error { if m.Header.Seq != seq { return fmt.Errorf("netlink: invalid seq %d, expected %d", m.Header.Seq, seq) } if m.Header.Pid != pid { return fmt.Errorf("netlink: wrong pid %d, expected %d", m.Header.Pid, pid) } if m.Header.Type == syscall.NLMSG_DONE { return io.EOF } if m.Header.Type == syscall.NLMSG_ERROR { e := int32(native.Uint32(m.Data[0:4])) if e == 0 { return io.EOF } return syscall.Errno(-e) } return nil } func (s *NetlinkSocket) HandleAck(seq uint32) error { pid, err := s.GetPid() if err != nil { return err } outer: for { msgs, err := s.Receive() if err != nil { return err } for _, m := range msgs { if err := s.CheckMessage(m, seq, pid); err != nil { if err == io.EOF { break outer } return err } } } return nil } func zeroTerminated(s string) []byte { return []byte(s + "\000") } func nonZeroTerminated(s string) []byte { return []byte(s) } // Add a new network link of a specified type. // This is identical to running: ip link add $name type $linkType func NetworkLinkAdd(name string, linkType string) error { if name == "" || linkType == "" { return fmt.Errorf("Neither link name nor link type can be empty!") } s, err := getNetlinkSocket() if err != nil { return err } defer s.Close() wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) msg := newIfInfomsg(syscall.AF_UNSPEC) wb.AddData(msg) linkInfo := newRtAttr(syscall.IFLA_LINKINFO, nil) newRtAttrChild(linkInfo, IFLA_INFO_KIND, nonZeroTerminated(linkType)) wb.AddData(linkInfo) nameData := newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(name)) wb.AddData(nameData) if err := s.Send(wb); err != nil { return err } return s.HandleAck(wb.Seq) } // Delete a network link. // This is identical to running: ip link del $name func NetworkLinkDel(name string) error { if name == "" { return fmt.Errorf("Network link name can not be empty!") } s, err := getNetlinkSocket() if err != nil { return err } defer s.Close() iface, err := net.InterfaceByName(name) if err != nil { return err } wb := newNetlinkRequest(syscall.RTM_DELLINK, syscall.NLM_F_ACK) msg := newIfInfomsg(syscall.AF_UNSPEC) msg.Index = int32(iface.Index) wb.AddData(msg) if err := s.Send(wb); err != nil { return err } return s.HandleAck(wb.Seq) } // Bring up a particular network interface. // This is identical to running: ip link set dev $name up func NetworkLinkUp(iface *net.Interface) error { s, err := getNetlinkSocket() if err != nil { return err } defer s.Close() wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK) msg := newIfInfomsg(syscall.AF_UNSPEC) msg.Index = int32(iface.Index) msg.Flags = syscall.IFF_UP msg.Change = syscall.IFF_UP wb.AddData(msg) if err := s.Send(wb); err != nil { return err } return s.HandleAck(wb.Seq) } // Bring down a particular network interface. // This is identical to running: ip link set $name down func NetworkLinkDown(iface *net.Interface) error { s, err := getNetlinkSocket() if err != nil { return err } defer s.Close() wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK) msg := newIfInfomsg(syscall.AF_UNSPEC) msg.Index = int32(iface.Index) msg.Flags = 0 & ^syscall.IFF_UP msg.Change = DEFAULT_CHANGE wb.AddData(msg) if err := s.Send(wb); err != nil { return err } return s.HandleAck(wb.Seq) } // Set link layer address ie. MAC Address. // This is identical to running: ip link set dev $name address $macaddress func NetworkSetMacAddress(iface *net.Interface, macaddr string) error { s, err := getNetlinkSocket() if err != nil { return err } defer s.Close() hwaddr, err := net.ParseMAC(macaddr) if err != nil { return err } var ( MULTICAST byte = 0x1 ) if hwaddr[0]&0x1 == MULTICAST { return fmt.Errorf("Multicast MAC Address is not supported: %s", macaddr) } wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) msg := newIfInfomsg(syscall.AF_UNSPEC) msg.Index = int32(iface.Index) msg.Change = DEFAULT_CHANGE wb.AddData(msg) macdata := make([]byte, 6) copy(macdata, hwaddr) data := newRtAttr(IFLA_ADDRESS, macdata) wb.AddData(data) if err := s.Send(wb); err != nil { return err } return s.HandleAck(wb.Seq) } // Set link Maximum Transmission Unit // This is identical to running: ip link set dev $name mtu $MTU // bridge is a bitch here https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=292088 // https://bugzilla.redhat.com/show_bug.cgi?id=697021 // There is a discussion about how to deal with ifcs joining bridge with MTU > 1500 // Regular network nterfaces do seem to work though! func NetworkSetMTU(iface *net.Interface, mtu int) error { s, err := getNetlinkSocket() if err != nil { return err } defer s.Close() wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) msg := newIfInfomsg(syscall.AF_UNSPEC) msg.Type = syscall.RTM_SETLINK msg.Flags = syscall.NLM_F_REQUEST msg.Index = int32(iface.Index) msg.Change = DEFAULT_CHANGE wb.AddData(msg) wb.AddData(uint32Attr(syscall.IFLA_MTU, uint32(mtu))) if err := s.Send(wb); err != nil { return err } return s.HandleAck(wb.Seq) } // Set link queue length // This is identical to running: ip link set dev $name txqueuelen $QLEN func NetworkSetTxQueueLen(iface *net.Interface, txQueueLen int) error { s, err := getNetlinkSocket() if err != nil { return err } defer s.Close() wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) msg := newIfInfomsg(syscall.AF_UNSPEC) msg.Type = syscall.RTM_SETLINK msg.Flags = syscall.NLM_F_REQUEST msg.Index = int32(iface.Index) msg.Change = DEFAULT_CHANGE wb.AddData(msg) wb.AddData(uint32Attr(syscall.IFLA_TXQLEN, uint32(txQueueLen))) if err := s.Send(wb); err != nil { return err } return s.HandleAck(wb.Seq) } func networkMasterAction(iface *net.Interface, rtattr *RtAttr) error { s, err := getNetlinkSocket() if err != nil { return err } defer s.Close() wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) msg := newIfInfomsg(syscall.AF_UNSPEC) msg.Type = syscall.RTM_SETLINK msg.Flags = syscall.NLM_F_REQUEST msg.Index = int32(iface.Index) msg.Change = DEFAULT_CHANGE wb.AddData(msg) wb.AddData(rtattr) if err := s.Send(wb); err != nil { return err } return s.HandleAck(wb.Seq) } // Add an interface to bridge. // This is identical to running: ip link set $name master $master func NetworkSetMaster(iface, master *net.Interface) error { data := uint32Attr(syscall.IFLA_MASTER, uint32(master.Index)) return networkMasterAction(iface, data) } // Remove an interface from the bridge // This is is identical to to running: ip link $name set nomaster func NetworkSetNoMaster(iface *net.Interface) error { data := uint32Attr(syscall.IFLA_MASTER, 0) return networkMasterAction(iface, data) } func networkSetNsAction(iface *net.Interface, rtattr *RtAttr) error { s, err := getNetlinkSocket() if err != nil { return err } defer s.Close() wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK) msg := newIfInfomsg(syscall.AF_UNSPEC) msg.Index = int32(iface.Index) wb.AddData(msg) wb.AddData(rtattr) if err := s.Send(wb); err != nil { return err } return s.HandleAck(wb.Seq) } // Move a particular network interface to a particular network namespace // specified by PID. This is identical to running: ip link set dev $name netns $pid func NetworkSetNsPid(iface *net.Interface, nspid int) error { data := uint32Attr(syscall.IFLA_NET_NS_PID, uint32(nspid)) return networkSetNsAction(iface, data) } // Move a particular network interface to a particular mounted // network namespace specified by file descriptor. // This is idential to running: ip link set dev $name netns $fd func NetworkSetNsFd(iface *net.Interface, fd int) error { data := uint32Attr(IFLA_NET_NS_FD, uint32(fd)) return networkSetNsAction(iface, data) } // Rename a particular interface to a different name // !!! Note that you can't rename an active interface. You need to bring it down before renaming it. // This is identical to running: ip link set dev ${oldName} name ${newName} func NetworkChangeName(iface *net.Interface, newName string) error { if len(newName) >= IFNAMSIZ { return fmt.Errorf("Interface name %s too long", newName) } s, err := getNetlinkSocket() if err != nil { return err } defer s.Close() wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) msg := newIfInfomsg(syscall.AF_UNSPEC) msg.Index = int32(iface.Index) msg.Change = DEFAULT_CHANGE wb.AddData(msg) nameData := newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(newName)) wb.AddData(nameData) if err := s.Send(wb); err != nil { return err } return s.HandleAck(wb.Seq) } // Add a new VETH pair link on the host // This is identical to running: ip link add name $name type veth peer name $peername func NetworkCreateVethPair(name1, name2 string, txQueueLen int) error { s, err := getNetlinkSocket() if err != nil { return err } defer s.Close() wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) msg := newIfInfomsg(syscall.AF_UNSPEC) wb.AddData(msg) nameData := newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(name1)) wb.AddData(nameData) txqLen := make([]byte, 4) native.PutUint32(txqLen, uint32(txQueueLen)) txqData := newRtAttr(syscall.IFLA_TXQLEN, txqLen) wb.AddData(txqData) nest1 := newRtAttr(syscall.IFLA_LINKINFO, nil) newRtAttrChild(nest1, IFLA_INFO_KIND, zeroTerminated("veth")) nest2 := newRtAttrChild(nest1, IFLA_INFO_DATA, nil) nest3 := newRtAttrChild(nest2, VETH_INFO_PEER, nil) newIfInfomsgChild(nest3, syscall.AF_UNSPEC) newRtAttrChild(nest3, syscall.IFLA_IFNAME, zeroTerminated(name2)) txqLen2 := make([]byte, 4) native.PutUint32(txqLen2, uint32(txQueueLen)) newRtAttrChild(nest3, syscall.IFLA_TXQLEN, txqLen2) wb.AddData(nest1) if err := s.Send(wb); err != nil { return err } if err := s.HandleAck(wb.Seq); err != nil { if os.IsExist(err) { return ErrInterfaceExists } return err } return nil } // Add a new VLAN interface with masterDev as its upper device // This is identical to running: // ip link add name $name link $masterdev type vlan id $id func NetworkLinkAddVlan(masterDev, vlanDev string, vlanId uint16) error { s, err := getNetlinkSocket() if err != nil { return err } defer s.Close() wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) masterDevIfc, err := net.InterfaceByName(masterDev) if err != nil { return err } msg := newIfInfomsg(syscall.AF_UNSPEC) wb.AddData(msg) nest1 := newRtAttr(syscall.IFLA_LINKINFO, nil) newRtAttrChild(nest1, IFLA_INFO_KIND, nonZeroTerminated("vlan")) nest2 := newRtAttrChild(nest1, IFLA_INFO_DATA, nil) vlanData := make([]byte, 2) native.PutUint16(vlanData, vlanId) newRtAttrChild(nest2, IFLA_VLAN_ID, vlanData) wb.AddData(nest1) wb.AddData(uint32Attr(syscall.IFLA_LINK, uint32(masterDevIfc.Index))) wb.AddData(newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(vlanDev))) if err := s.Send(wb); err != nil { return err } return s.HandleAck(wb.Seq) } // MacVlan link has LowerDev, UpperDev and operates in Mode mode // This simplifies the code when creating MacVlan or MacVtap interface type MacVlanLink struct { MasterDev string SlaveDev string mode string } func (m MacVlanLink) Mode() uint32 { modeMap := map[string]uint32{ "private": MACVLAN_MODE_PRIVATE, "vepa": MACVLAN_MODE_VEPA, "bridge": MACVLAN_MODE_BRIDGE, "passthru": MACVLAN_MODE_PASSTHRU, } return modeMap[m.mode] } // Add MAC VLAN network interface with masterDev as its upper device // This is identical to running: // ip link add name $name link $masterdev type macvlan mode $mode func networkLinkMacVlan(dev_type string, mcvln *MacVlanLink) error { s, err := getNetlinkSocket() if err != nil { return err } defer s.Close() wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) masterDevIfc, err := net.InterfaceByName(mcvln.MasterDev) if err != nil { return err } msg := newIfInfomsg(syscall.AF_UNSPEC) wb.AddData(msg) nest1 := newRtAttr(syscall.IFLA_LINKINFO, nil) newRtAttrChild(nest1, IFLA_INFO_KIND, nonZeroTerminated(dev_type)) nest2 := newRtAttrChild(nest1, IFLA_INFO_DATA, nil) macVlanData := make([]byte, 4) native.PutUint32(macVlanData, mcvln.Mode()) newRtAttrChild(nest2, IFLA_MACVLAN_MODE, macVlanData) wb.AddData(nest1) wb.AddData(uint32Attr(syscall.IFLA_LINK, uint32(masterDevIfc.Index))) wb.AddData(newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(mcvln.SlaveDev))) if err := s.Send(wb); err != nil { return err } return s.HandleAck(wb.Seq) } func NetworkLinkAddMacVlan(masterDev, macVlanDev string, mode string) error { return networkLinkMacVlan("macvlan", &MacVlanLink{ MasterDev: masterDev, SlaveDev: macVlanDev, mode: mode, }) } func NetworkLinkAddMacVtap(masterDev, macVlanDev string, mode string) error { return networkLinkMacVlan("macvtap", &MacVlanLink{ MasterDev: masterDev, SlaveDev: macVlanDev, mode: mode, }) } func networkLinkIpAction(action, flags int, ifa IfAddr) error { s, err := getNetlinkSocket() if err != nil { return err } defer s.Close() family := getIpFamily(ifa.IP) wb := newNetlinkRequest(action, flags) msg := newIfAddrmsg(family) msg.Index = uint32(ifa.Iface.Index) prefixLen, _ := ifa.IPNet.Mask.Size() msg.Prefixlen = uint8(prefixLen) wb.AddData(msg) var ipData []byte if family == syscall.AF_INET { ipData = ifa.IP.To4() } else { ipData = ifa.IP.To16() } localData := newRtAttr(syscall.IFA_LOCAL, ipData) wb.AddData(localData) addrData := newRtAttr(syscall.IFA_ADDRESS, ipData) wb.AddData(addrData) if err := s.Send(wb); err != nil { return err } return s.HandleAck(wb.Seq) } // Delete an IP address from an interface. This is identical to: // ip addr del $ip/$ipNet dev $iface func NetworkLinkDelIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error { return networkLinkIpAction( syscall.RTM_DELADDR, syscall.NLM_F_ACK, IfAddr{iface, ip, ipNet}, ) } // Add an Ip address to an interface. This is identical to: // ip addr add $ip/$ipNet dev $iface func NetworkLinkAddIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error { return networkLinkIpAction( syscall.RTM_NEWADDR, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK, IfAddr{iface, ip, ipNet}, ) } // Returns an array of IPNet for all the currently routed subnets on ipv4 // This is similar to the first column of "ip route" output func NetworkGetRoutes() ([]Route, error) { s, err := getNetlinkSocket() if err != nil { return nil, err } defer s.Close() wb := newNetlinkRequest(syscall.RTM_GETROUTE, syscall.NLM_F_DUMP) msg := newIfInfomsg(syscall.AF_UNSPEC) wb.AddData(msg) if err := s.Send(wb); err != nil { return nil, err } pid, err := s.GetPid() if err != nil { return nil, err } res := make([]Route, 0) outer: for { msgs, err := s.Receive() if err != nil { return nil, err } for _, m := range msgs { if err := s.CheckMessage(m, wb.Seq, pid); err != nil { if err == io.EOF { break outer } return nil, err } if m.Header.Type != syscall.RTM_NEWROUTE { continue } var r Route msg := (*RtMsg)(unsafe.Pointer(&m.Data[0:syscall.SizeofRtMsg][0])) if msg.Flags&syscall.RTM_F_CLONED != 0 { // Ignore cloned routes continue } if msg.Table != syscall.RT_TABLE_MAIN { // Ignore non-main tables continue } if msg.Family != syscall.AF_INET { // Ignore non-ipv4 routes continue } if msg.Dst_len == 0 { // Default routes r.Default = true } attrs, err := syscall.ParseNetlinkRouteAttr(&m) if err != nil { return nil, err } for _, attr := range attrs { switch attr.Attr.Type { case syscall.RTA_DST: ip := attr.Value r.IPNet = &net.IPNet{ IP: ip, Mask: net.CIDRMask(int(msg.Dst_len), 8*len(ip)), } case syscall.RTA_OIF: index := int(native.Uint32(attr.Value[0:4])) r.Iface, _ = net.InterfaceByIndex(index) } } if r.Default || r.IPNet != nil { res = append(res, r) } } } return res, nil } // Add a new route table entry. func AddRoute(destination, source, gateway, device string) error { if destination == "" && source == "" && gateway == "" { return fmt.Errorf("one of destination, source or gateway must not be blank") } s, err := getNetlinkSocket() if err != nil { return err } defer s.Close() wb := newNetlinkRequest(syscall.RTM_NEWROUTE, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) msg := newRtMsg() currentFamily := -1 var rtAttrs []*RtAttr if destination != "" { destIP, destNet, err := net.ParseCIDR(destination) if err != nil { return fmt.Errorf("destination CIDR %s couldn't be parsed", destination) } destFamily := getIpFamily(destIP) currentFamily = destFamily destLen, bits := destNet.Mask.Size() if destLen == 0 && bits == 0 { return fmt.Errorf("destination CIDR %s generated a non-canonical Mask", destination) } msg.Family = uint8(destFamily) msg.Dst_len = uint8(destLen) var destData []byte if destFamily == syscall.AF_INET { destData = destIP.To4() } else { destData = destIP.To16() } rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_DST, destData)) } if source != "" { srcIP := net.ParseIP(source) if srcIP == nil { return fmt.Errorf("source IP %s couldn't be parsed", source) } srcFamily := getIpFamily(srcIP) if currentFamily != -1 && currentFamily != srcFamily { return fmt.Errorf("source and destination ip were not the same IP family") } currentFamily = srcFamily msg.Family = uint8(srcFamily) var srcData []byte if srcFamily == syscall.AF_INET { srcData = srcIP.To4() } else { srcData = srcIP.To16() } rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_PREFSRC, srcData)) } if gateway != "" { gwIP := net.ParseIP(gateway) if gwIP == nil { return fmt.Errorf("gateway IP %s couldn't be parsed", gateway) } gwFamily := getIpFamily(gwIP) if currentFamily != -1 && currentFamily != gwFamily { return fmt.Errorf("gateway, source, and destination ip were not the same IP family") } msg.Family = uint8(gwFamily) var gwData []byte if gwFamily == syscall.AF_INET { gwData = gwIP.To4() } else { gwData = gwIP.To16() } rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_GATEWAY, gwData)) } wb.AddData(msg) for _, attr := range rtAttrs { wb.AddData(attr) } iface, err := net.InterfaceByName(device) if err != nil { return err } wb.AddData(uint32Attr(syscall.RTA_OIF, uint32(iface.Index))) if err := s.Send(wb); err != nil { return err } return s.HandleAck(wb.Seq) } // Add a new default gateway. Identical to: // ip route add default via $ip func AddDefaultGw(ip, device string) error { return AddRoute("", "", ip, device) } // THIS CODE DOES NOT COMMUNICATE WITH KERNEL VIA RTNETLINK INTERFACE // IT IS HERE FOR BACKWARDS COMPATIBILITY WITH OLDER LINUX KERNELS // WHICH SHIP WITH OLDER NOT ENTIRELY FUNCTIONAL VERSION OF NETLINK func getIfSocket() (fd int, err error) { for _, socket := range []int{ syscall.AF_INET, syscall.AF_PACKET, syscall.AF_INET6, } { if fd, err = syscall.Socket(socket, syscall.SOCK_DGRAM, 0); err == nil { break } } if err == nil { return fd, nil } return -1, err } // Create the actual bridge device. This is more backward-compatible than // netlink.NetworkLinkAdd and works on RHEL 6. func CreateBridge(name string, setMacAddr bool) error { if len(name) >= IFNAMSIZ { return fmt.Errorf("Interface name %s too long", name) } s, err := getIfSocket() if err != nil { return err } defer syscall.Close(s) nameBytePtr, err := syscall.BytePtrFromString(name) if err != nil { return err } if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), SIOC_BRADDBR, uintptr(unsafe.Pointer(nameBytePtr))); err != 0 { return err } if setMacAddr { return SetMacAddress(name, randMacAddr()) } return nil } // Delete the actual bridge device. func DeleteBridge(name string) error { s, err := getIfSocket() if err != nil { return err } defer syscall.Close(s) nameBytePtr, err := syscall.BytePtrFromString(name) if err != nil { return err } var ifr ifreqFlags copy(ifr.IfrnName[:len(ifr.IfrnName)-1], []byte(name)) if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), syscall.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifr))); err != 0 { return err } if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), SIOC_BRDELBR, uintptr(unsafe.Pointer(nameBytePtr))); err != 0 { return err } return nil } // Add a slave to abridge device. This is more backward-compatible than // netlink.NetworkSetMaster and works on RHEL 6. func AddToBridge(iface, master *net.Interface) error { if len(master.Name) >= IFNAMSIZ { return fmt.Errorf("Interface name %s too long", master.Name) } s, err := getIfSocket() if err != nil { return err } defer syscall.Close(s) ifr := ifreqIndex{} copy(ifr.IfrnName[:len(ifr.IfrnName)-1], master.Name) ifr.IfruIndex = int32(iface.Index) if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), SIOC_BRADDIF, uintptr(unsafe.Pointer(&ifr))); err != 0 { return err } return nil } func randMacAddr() string { hw := make(net.HardwareAddr, 6) for i := 0; i < 6; i++ { hw[i] = byte(rand.Intn(255)) } hw[0] &^= 0x1 // clear multicast bit hw[0] |= 0x2 // set local assignment bit (IEEE802) return hw.String() } func SetMacAddress(name, addr string) error { if len(name) >= IFNAMSIZ { return fmt.Errorf("Interface name %s too long", name) } hw, err := net.ParseMAC(addr) if err != nil { return err } s, err := getIfSocket() if err != nil { return err } defer syscall.Close(s) ifr := ifreqHwaddr{} ifr.IfruHwaddr.Family = syscall.ARPHRD_ETHER copy(ifr.IfrnName[:len(ifr.IfrnName)-1], name) for i := 0; i < 6; i++ { ifr.IfruHwaddr.Data[i] = ifrDataByte(hw[i]) } if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), syscall.SIOCSIFHWADDR, uintptr(unsafe.Pointer(&ifr))); err != 0 { return err } return nil } func SetHairpinMode(iface *net.Interface, enabled bool) error { s, err := getNetlinkSocket() if err != nil { return err } defer s.Close() req := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) msg := newIfInfomsg(syscall.AF_BRIDGE) msg.Type = syscall.RTM_SETLINK msg.Flags = syscall.NLM_F_REQUEST msg.Index = int32(iface.Index) msg.Change = DEFAULT_CHANGE req.AddData(msg) mode := []byte{0} if enabled { mode[0] = byte(1) } br := newRtAttr(syscall.IFLA_PROTINFO|syscall.NLA_F_NESTED, nil) newRtAttrChild(br, IFLA_BRPORT_MODE, mode) req.AddData(br) if err := s.Send(req); err != nil { return err } return s.HandleAck(req.Seq) } func ChangeName(iface *net.Interface, newName string) error { if len(newName) >= IFNAMSIZ { return fmt.Errorf("Interface name %s too long", newName) } fd, err := getIfSocket() if err != nil { return err } defer syscall.Close(fd) data := [IFNAMSIZ * 2]byte{} // the "-1"s here are very important for ensuring we get proper null // termination of our new C strings copy(data[:IFNAMSIZ-1], iface.Name) copy(data[IFNAMSIZ:IFNAMSIZ*2-1], newName) if _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), syscall.SIOCSIFNAME, uintptr(unsafe.Pointer(&data[0]))); errno != 0 { return errno } return nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/netlink/netlink_linux_arm.go0000644000175000017500000000010512524212370025667 0ustar tianontianonpackage netlink func ifrDataByte(b byte) uint8 { return uint8(b) } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/netlink/netlink_linux_test.go0000644000175000017500000002126112524212370026075 0ustar tianontianonpackage netlink import ( "net" "strings" "syscall" "testing" ) type testLink struct { name string linkType string } func addLink(t *testing.T, name string, linkType string) { if err := NetworkLinkAdd(name, linkType); err != nil { t.Fatalf("Unable to create %s link: %s", name, err) } } func readLink(t *testing.T, name string) *net.Interface { iface, err := net.InterfaceByName(name) if err != nil { t.Fatalf("Could not find %s interface: %s", name, err) } return iface } func deleteLink(t *testing.T, name string) { if err := NetworkLinkDel(name); err != nil { t.Fatalf("Unable to delete %s link: %s", name, err) } } func upLink(t *testing.T, name string) { iface := readLink(t, name) if err := NetworkLinkUp(iface); err != nil { t.Fatalf("Could not bring UP %#v interface: %s", iface, err) } } func downLink(t *testing.T, name string) { iface := readLink(t, name) if err := NetworkLinkDown(iface); err != nil { t.Fatalf("Could not bring DOWN %#v interface: %s", iface, err) } } func ipAssigned(iface *net.Interface, ip net.IP) bool { addrs, _ := iface.Addrs() for _, addr := range addrs { args := strings.SplitN(addr.String(), "/", 2) if args[0] == ip.String() { return true } } return false } func TestNetworkLinkAddDel(t *testing.T) { if testing.Short() { return } testLinks := []testLink{ {"tstEth", "dummy"}, {"tstBr", "bridge"}, } for _, tl := range testLinks { addLink(t, tl.name, tl.linkType) defer deleteLink(t, tl.name) readLink(t, tl.name) } } func TestNetworkLinkUpDown(t *testing.T) { if testing.Short() { return } tl := testLink{name: "tstEth", linkType: "dummy"} addLink(t, tl.name, tl.linkType) defer deleteLink(t, tl.name) upLink(t, tl.name) ifcAfterUp := readLink(t, tl.name) if (ifcAfterUp.Flags & syscall.IFF_UP) != syscall.IFF_UP { t.Fatalf("Could not bring UP %#v initerface", tl) } downLink(t, tl.name) ifcAfterDown := readLink(t, tl.name) if (ifcAfterDown.Flags & syscall.IFF_UP) == syscall.IFF_UP { t.Fatalf("Could not bring DOWN %#v initerface", tl) } } func TestNetworkSetMacAddress(t *testing.T) { if testing.Short() { return } tl := testLink{name: "tstEth", linkType: "dummy"} macaddr := "22:ce:e0:99:63:6f" addLink(t, tl.name, tl.linkType) defer deleteLink(t, tl.name) ifcBeforeSet := readLink(t, tl.name) if err := NetworkSetMacAddress(ifcBeforeSet, macaddr); err != nil { t.Fatalf("Could not set %s MAC address on %#v interface: %s", macaddr, tl, err) } ifcAfterSet := readLink(t, tl.name) if ifcAfterSet.HardwareAddr.String() != macaddr { t.Fatalf("Could not set %s MAC address on %#v interface", macaddr, tl) } } func TestNetworkSetMTU(t *testing.T) { if testing.Short() { return } tl := testLink{name: "tstEth", linkType: "dummy"} mtu := 1400 addLink(t, tl.name, tl.linkType) defer deleteLink(t, tl.name) ifcBeforeSet := readLink(t, tl.name) if err := NetworkSetMTU(ifcBeforeSet, mtu); err != nil { t.Fatalf("Could not set %d MTU on %#v interface: %s", mtu, tl, err) } ifcAfterSet := readLink(t, tl.name) if ifcAfterSet.MTU != mtu { t.Fatalf("Could not set %d MTU on %#v interface", mtu, tl) } } func TestNetworkSetMasterNoMaster(t *testing.T) { if testing.Short() { return } master := testLink{"tstBr", "bridge"} slave := testLink{"tstEth", "dummy"} testLinks := []testLink{master, slave} for _, tl := range testLinks { addLink(t, tl.name, tl.linkType) defer deleteLink(t, tl.name) upLink(t, tl.name) } masterIfc := readLink(t, master.name) slaveIfc := readLink(t, slave.name) if err := NetworkSetMaster(slaveIfc, masterIfc); err != nil { t.Fatalf("Could not set %#v to be the master of %#v: %s", master, slave, err) } // Trying to figure out a way to test which will not break on RHEL6. // We could check for existence of /sys/class/net/tstEth/upper_tstBr // which should point to the ../tstBr which is the UPPER device i.e. network bridge if err := NetworkSetNoMaster(slaveIfc); err != nil { t.Fatalf("Could not UNset %#v master of %#v: %s", master, slave, err) } } func TestNetworkChangeName(t *testing.T) { if testing.Short() { return } tl := testLink{"tstEth", "dummy"} newName := "newTst" addLink(t, tl.name, tl.linkType) linkIfc := readLink(t, tl.name) if err := NetworkChangeName(linkIfc, newName); err != nil { deleteLink(t, tl.name) t.Fatalf("Could not change %#v interface name to %s: %s", tl, newName, err) } readLink(t, newName) deleteLink(t, newName) } func TestNetworkLinkAddVlan(t *testing.T) { if testing.Short() { return } tl := struct { name string id uint16 }{ name: "tstVlan", id: 32, } masterLink := testLink{"tstEth", "dummy"} addLink(t, masterLink.name, masterLink.linkType) defer deleteLink(t, masterLink.name) if err := NetworkLinkAddVlan(masterLink.name, tl.name, tl.id); err != nil { t.Fatalf("Unable to create %#v VLAN interface: %s", tl, err) } readLink(t, tl.name) } func TestNetworkLinkAddMacVlan(t *testing.T) { if testing.Short() { return } tl := struct { name string mode string }{ name: "tstVlan", mode: "private", } masterLink := testLink{"tstEth", "dummy"} addLink(t, masterLink.name, masterLink.linkType) defer deleteLink(t, masterLink.name) if err := NetworkLinkAddMacVlan(masterLink.name, tl.name, tl.mode); err != nil { t.Fatalf("Unable to create %#v MAC VLAN interface: %s", tl, err) } readLink(t, tl.name) } func TestNetworkLinkAddMacVtap(t *testing.T) { if testing.Short() { return } tl := struct { name string mode string }{ name: "tstVtap", mode: "private", } masterLink := testLink{"tstEth", "dummy"} addLink(t, masterLink.name, masterLink.linkType) defer deleteLink(t, masterLink.name) if err := NetworkLinkAddMacVtap(masterLink.name, tl.name, tl.mode); err != nil { t.Fatalf("Unable to create %#v MAC VTAP interface: %s", tl, err) } readLink(t, tl.name) } func TestAddDelNetworkIp(t *testing.T) { if testing.Short() { return } ifaceName := "lo" ip := net.ParseIP("127.0.1.1") mask := net.IPv4Mask(255, 255, 255, 255) ipNet := &net.IPNet{IP: ip, Mask: mask} iface, err := net.InterfaceByName(ifaceName) if err != nil { t.Skip("No 'lo' interface; skipping tests") } if err := NetworkLinkAddIp(iface, ip, ipNet); err != nil { t.Fatalf("Could not add IP address %s to interface %#v: %s", ip.String(), iface, err) } if !ipAssigned(iface, ip) { t.Fatalf("Could not locate address '%s' in lo address list.", ip.String()) } if err := NetworkLinkDelIp(iface, ip, ipNet); err != nil { t.Fatalf("Could not delete IP address %s from interface %#v: %s", ip.String(), iface, err) } if ipAssigned(iface, ip) { t.Fatalf("Located address '%s' in lo address list after removal.", ip.String()) } } func TestAddRouteSourceSelection(t *testing.T) { tstIp := "127.1.1.1" tl := testLink{name: "tstEth", linkType: "dummy"} addLink(t, tl.name, tl.linkType) defer deleteLink(t, tl.name) ip := net.ParseIP(tstIp) mask := net.IPv4Mask(255, 255, 255, 255) ipNet := &net.IPNet{IP: ip, Mask: mask} iface, err := net.InterfaceByName(tl.name) if err != nil { t.Fatalf("Lost created link %#v", tl) } if err := NetworkLinkAddIp(iface, ip, ipNet); err != nil { t.Fatalf("Could not add IP address %s to interface %#v: %s", ip.String(), iface, err) } upLink(t, tl.name) defer downLink(t, tl.name) if err := AddRoute("127.0.0.0/8", tstIp, "", tl.name); err != nil { t.Fatalf("Failed to add route with source address") } } func TestCreateVethPair(t *testing.T) { if testing.Short() { return } var ( name1 = "veth1" name2 = "veth2" ) if err := NetworkCreateVethPair(name1, name2, 0); err != nil { t.Fatalf("Could not create veth pair %s %s: %s", name1, name2, err) } defer NetworkLinkDel(name1) readLink(t, name1) readLink(t, name2) } // // netlink package tests which do not use RTNETLINK // func TestCreateBridgeWithMac(t *testing.T) { if testing.Short() { return } name := "testbridge" if err := CreateBridge(name, true); err != nil { t.Fatal(err) } if _, err := net.InterfaceByName(name); err != nil { t.Fatal(err) } // cleanup and tests if err := DeleteBridge(name); err != nil { t.Fatal(err) } if _, err := net.InterfaceByName(name); err == nil { t.Fatalf("expected error getting interface because %s bridge was deleted", name) } } func TestSetMacAddress(t *testing.T) { if testing.Short() { return } name := "testmac" mac := randMacAddr() if err := NetworkLinkAdd(name, "bridge"); err != nil { t.Fatal(err) } defer NetworkLinkDel(name) if err := SetMacAddress(name, mac); err != nil { t.Fatal(err) } iface, err := net.InterfaceByName(name) if err != nil { t.Fatal(err) } if iface.HardwareAddr.String() != mac { t.Fatalf("mac address %q does not match %q", iface.HardwareAddr, mac) } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/netlink/netlink_unsupported.go0000644000175000017500000000340412524212370026266 0ustar tianontianon// +build !linux package netlink import ( "errors" "net" ) var ( ErrNotImplemented = errors.New("not implemented") ) func NetworkGetRoutes() ([]Route, error) { return nil, ErrNotImplemented } func NetworkLinkAdd(name string, linkType string) error { return ErrNotImplemented } func NetworkLinkDel(name string) error { return ErrNotImplemented } func NetworkLinkUp(iface *net.Interface) error { return ErrNotImplemented } func NetworkLinkAddIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error { return ErrNotImplemented } func NetworkLinkDelIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error { return ErrNotImplemented } func AddRoute(destination, source, gateway, device string) error { return ErrNotImplemented } func AddDefaultGw(ip, device string) error { return ErrNotImplemented } func NetworkSetMTU(iface *net.Interface, mtu int) error { return ErrNotImplemented } func NetworkSetTxQueueLen(iface *net.Interface, txQueueLen int) error { return ErrNotImplemented } func NetworkCreateVethPair(name1, name2 string, txQueueLen int) error { return ErrNotImplemented } func NetworkChangeName(iface *net.Interface, newName string) error { return ErrNotImplemented } func NetworkSetNsFd(iface *net.Interface, fd int) error { return ErrNotImplemented } func NetworkSetNsPid(iface *net.Interface, nspid int) error { return ErrNotImplemented } func NetworkSetMaster(iface, master *net.Interface) error { return ErrNotImplemented } func NetworkLinkDown(iface *net.Interface) error { return ErrNotImplemented } func CreateBridge(name string, setMacAddr bool) error { return ErrNotImplemented } func DeleteBridge(name string) error { return ErrNotImplemented } func AddToBridge(iface, master *net.Interface) error { return ErrNotImplemented } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/container.go0000644000175000017500000001057712524212370022501 0ustar tianontianon// Libcontainer provides a native Go implementation for creating containers // with namespaces, cgroups, capabilities, and filesystem access controls. // It allows you to manage the lifecycle of the container performing additional operations // after the container is created. package libcontainer import ( "github.com/docker/libcontainer/configs" ) // The status of a container. type Status int const ( // The container exists and is running. Running Status = iota + 1 // The container exists, it is in the process of being paused. Pausing // The container exists, but all its processes are paused. Paused // The container does not exist. Destroyed ) // State represents a running container's state type State struct { // ID is the container ID. ID string `json:"id"` // InitProcessPid is the init process id in the parent namespace. InitProcessPid int `json:"init_process_pid"` // InitProcessStartTime is the init process start time. InitProcessStartTime string `json:"init_process_start"` // Path to all the cgroups setup for a container. Key is cgroup subsystem name // with the value as the path. CgroupPaths map[string]string `json:"cgroup_paths"` // NamespacePaths are filepaths to the container's namespaces. Key is the namespace type // with the value as the path. NamespacePaths map[configs.NamespaceType]string `json:"namespace_paths"` // Config is the container's configuration. Config configs.Config `json:"config"` } // A libcontainer container object. // // Each container is thread-safe within the same process. Since a container can // be destroyed by a separate process, any function may return that the container // was not found. type Container interface { // Returns the ID of the container ID() string // Returns the current status of the container. // // errors: // ContainerDestroyed - Container no longer exists, // Systemerror - System error. Status() (Status, error) // State returns the current container's state information. // // errors: // Systemerror - System erroor. State() (*State, error) // Returns the current config of the container. Config() configs.Config // Returns the PIDs inside this container. The PIDs are in the namespace of the calling process. // // errors: // ContainerDestroyed - Container no longer exists, // Systemerror - System error. // // Some of the returned PIDs may no longer refer to processes in the Container, unless // the Container state is PAUSED in which case every PID in the slice is valid. Processes() ([]int, error) // Returns statistics for the container. // // errors: // ContainerDestroyed - Container no longer exists, // Systemerror - System error. Stats() (*Stats, error) // Set cgroup resources of container as configured // // We can use this to change resources when containers are running. // // errors: // Systemerror - System error. Set(config configs.Config) error // Start a process inside the container. Returns error if process fails to // start. You can track process lifecycle with passed Process structure. // // errors: // ContainerDestroyed - Container no longer exists, // ConfigInvalid - config is invalid, // ContainerPaused - Container is paused, // Systemerror - System error. Start(process *Process) (err error) // Destroys the container after killing all running processes. // // Any event registrations are removed before the container is destroyed. // No error is returned if the container is already destroyed. // // errors: // Systemerror - System error. Destroy() error // If the Container state is RUNNING or PAUSING, sets the Container state to PAUSING and pauses // the execution of any user processes. Asynchronously, when the container finished being paused the // state is changed to PAUSED. // If the Container state is PAUSED, do nothing. // // errors: // ContainerDestroyed - Container no longer exists, // Systemerror - System error. Pause() error // If the Container state is PAUSED, resumes the execution of any user processes in the // Container before setting the Container state to RUNNING. // If the Container state is RUNNING, do nothing. // // errors: // ContainerDestroyed - Container no longer exists, // Systemerror - System error. Resume() error // NotifyOOM returns a read-only channel signaling when the container receives an OOM notification. // // errors: // Systemerror - System error. NotifyOOM() (<-chan struct{}, error) } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/container_linux.go0000644000175000017500000001760612524212370023720 0ustar tianontianon// +build linux package libcontainer import ( "encoding/json" "fmt" "os" "os/exec" "path/filepath" "sync" "syscall" log "github.com/Sirupsen/logrus" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" ) type linuxContainer struct { id string root string config *configs.Config cgroupManager cgroups.Manager initPath string initArgs []string initProcess parentProcess m sync.Mutex } // ID returns the container's unique ID func (c *linuxContainer) ID() string { return c.id } // Config returns the container's configuration func (c *linuxContainer) Config() configs.Config { return *c.config } func (c *linuxContainer) Status() (Status, error) { c.m.Lock() defer c.m.Unlock() return c.currentStatus() } func (c *linuxContainer) State() (*State, error) { c.m.Lock() defer c.m.Unlock() return c.currentState() } func (c *linuxContainer) Processes() ([]int, error) { pids, err := c.cgroupManager.GetPids() if err != nil { return nil, newSystemError(err) } return pids, nil } func (c *linuxContainer) Stats() (*Stats, error) { var ( err error stats = &Stats{} ) if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil { return stats, newSystemError(err) } for _, iface := range c.config.Networks { switch iface.Type { case "veth": istats, err := getNetworkInterfaceStats(iface.HostInterfaceName) if err != nil { return stats, newSystemError(err) } stats.Interfaces = append(stats.Interfaces, istats) } } return stats, nil } func (c *linuxContainer) Set(config configs.Config) error { c.m.Lock() defer c.m.Unlock() c.config = &config return c.cgroupManager.Set(c.config) } func (c *linuxContainer) Start(process *Process) error { c.m.Lock() defer c.m.Unlock() status, err := c.currentStatus() if err != nil { return err } doInit := status == Destroyed parent, err := c.newParentProcess(process, doInit) if err != nil { return newSystemError(err) } if err := parent.start(); err != nil { // terminate the process to ensure that it properly is reaped. if err := parent.terminate(); err != nil { log.Warn(err) } return newSystemError(err) } process.ops = parent if doInit { c.updateState(parent) } return nil } func (c *linuxContainer) newParentProcess(p *Process, doInit bool) (parentProcess, error) { parentPipe, childPipe, err := newPipe() if err != nil { return nil, newSystemError(err) } cmd, err := c.commandTemplate(p, childPipe) if err != nil { return nil, newSystemError(err) } if !doInit { return c.newSetnsProcess(p, cmd, parentPipe, childPipe), nil } return c.newInitProcess(p, cmd, parentPipe, childPipe) } func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.Cmd, error) { cmd := &exec.Cmd{ Path: c.initPath, Args: c.initArgs, } cmd.Stdin = p.Stdin cmd.Stdout = p.Stdout cmd.Stderr = p.Stderr cmd.Dir = c.config.Rootfs if cmd.SysProcAttr == nil { cmd.SysProcAttr = &syscall.SysProcAttr{} } cmd.ExtraFiles = []*os.File{childPipe} // NOTE: when running a container with no PID namespace and the parent process spawning the container is // PID1 the pdeathsig is being delivered to the container's init process by the kernel for some reason // even with the parent still running. if c.config.ParentDeathSignal > 0 { cmd.SysProcAttr.Pdeathsig = syscall.Signal(c.config.ParentDeathSignal) } return cmd, nil } func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*initProcess, error) { t := "_LIBCONTAINER_INITTYPE=standard" cloneFlags := c.config.Namespaces.CloneFlags() if cloneFlags&syscall.CLONE_NEWUSER != 0 { if err := c.addUidGidMappings(cmd.SysProcAttr); err != nil { // user mappings are not supported return nil, err } // Default to root user when user namespaces are enabled. if cmd.SysProcAttr.Credential == nil { cmd.SysProcAttr.Credential = &syscall.Credential{} } } cmd.Env = append(cmd.Env, t) cmd.SysProcAttr.Cloneflags = cloneFlags return &initProcess{ cmd: cmd, childPipe: childPipe, parentPipe: parentPipe, manager: c.cgroupManager, config: c.newInitConfig(p), }, nil } func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) *setnsProcess { cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPID=%d", c.initProcess.pid()), "_LIBCONTAINER_INITTYPE=setns", ) if p.consolePath != "" { cmd.Env = append(cmd.Env, "_LIBCONTAINER_CONSOLE_PATH="+p.consolePath) } // TODO: set on container for process management return &setnsProcess{ cmd: cmd, cgroupPaths: c.cgroupManager.GetPaths(), childPipe: childPipe, parentPipe: parentPipe, config: c.newInitConfig(p), } } func (c *linuxContainer) newInitConfig(process *Process) *initConfig { return &initConfig{ Config: c.config, Args: process.Args, Env: process.Env, User: process.User, Cwd: process.Cwd, Console: process.consolePath, Capabilities: process.Capabilities, } } func newPipe() (parent *os.File, child *os.File, err error) { fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0) if err != nil { return nil, nil, err } return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil } func (c *linuxContainer) Destroy() error { c.m.Lock() defer c.m.Unlock() status, err := c.currentStatus() if err != nil { return err } if status != Destroyed { return newGenericError(fmt.Errorf("container is not destroyed"), ContainerNotStopped) } if !c.config.Namespaces.Contains(configs.NEWPID) { if err := killCgroupProcesses(c.cgroupManager); err != nil { log.Warn(err) } } err = c.cgroupManager.Destroy() if rerr := os.RemoveAll(c.root); err == nil { err = rerr } c.initProcess = nil return err } func (c *linuxContainer) Pause() error { c.m.Lock() defer c.m.Unlock() return c.cgroupManager.Freeze(configs.Frozen) } func (c *linuxContainer) Resume() error { c.m.Lock() defer c.m.Unlock() return c.cgroupManager.Freeze(configs.Thawed) } func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) { return notifyOnOOM(c.cgroupManager.GetPaths()) } func (c *linuxContainer) updateState(process parentProcess) error { c.initProcess = process state, err := c.currentState() if err != nil { return err } f, err := os.Create(filepath.Join(c.root, stateFilename)) if err != nil { return err } defer f.Close() return json.NewEncoder(f).Encode(state) } func (c *linuxContainer) currentStatus() (Status, error) { if c.initProcess == nil { return Destroyed, nil } // return Running if the init process is alive if err := syscall.Kill(c.initProcess.pid(), 0); err != nil { if err == syscall.ESRCH { return Destroyed, nil } return 0, newSystemError(err) } if c.config.Cgroups != nil && c.config.Cgroups.Freezer == configs.Frozen { return Paused, nil } return Running, nil } func (c *linuxContainer) currentState() (*State, error) { status, err := c.currentStatus() if err != nil { return nil, err } if status == Destroyed { return nil, newGenericError(fmt.Errorf("container destroyed"), ContainerNotExists) } startTime, err := c.initProcess.startTime() if err != nil { return nil, newSystemError(err) } state := &State{ ID: c.ID(), Config: *c.config, InitProcessPid: c.initProcess.pid(), InitProcessStartTime: startTime, CgroupPaths: c.cgroupManager.GetPaths(), NamespacePaths: make(map[configs.NamespaceType]string), } for _, ns := range c.config.Namespaces { state.NamespacePaths[ns.Type] = ns.GetPath(c.initProcess.pid()) } for _, nsType := range configs.NamespaceTypes() { if _, ok := state.NamespacePaths[nsType]; !ok { ns := configs.Namespace{Type: nsType} state.NamespacePaths[ns.Type] = ns.GetPath(c.initProcess.pid()) } } return state, nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/update-vendor.sh0000755000175000017500000000170312524212370023273 0ustar tianontianon#!/usr/bin/env bash set -e cd "$(dirname "$BASH_SOURCE")" # Downloads dependencies into vendor/ directory mkdir -p vendor cd vendor clone() { vcs=$1 pkg=$2 rev=$3 pkg_url=https://$pkg target_dir=src/$pkg echo -n "$pkg @ $rev: " if [ -d $target_dir ]; then echo -n 'rm old, ' rm -fr $target_dir fi echo -n 'clone, ' case $vcs in git) git clone --quiet --no-checkout $pkg_url $target_dir ( cd $target_dir && git reset --quiet --hard $rev ) ;; hg) hg clone --quiet --updaterev $rev $pkg_url $target_dir ;; esac echo -n 'rm VCS, ' ( cd $target_dir && rm -rf .{git,hg} ) echo done } # the following lines are in sorted order, FYI clone git github.com/codegangsta/cli 1.1.0 clone git github.com/coreos/go-systemd v2 clone git github.com/godbus/dbus v2 clone git github.com/Sirupsen/logrus v0.6.6 clone git github.com/syndtr/gocapability 8e4cdcb # intentionally not vendoring Docker itself... that'd be a circle :) libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/init_linux.go0000644000175000017500000001461712524212370022700 0ustar tianontianon// +build linux package libcontainer import ( "encoding/json" "fmt" "os" "strings" "syscall" log "github.com/Sirupsen/logrus" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/netlink" "github.com/docker/libcontainer/system" "github.com/docker/libcontainer/user" "github.com/docker/libcontainer/utils" ) type initType string const ( initSetns initType = "setns" initStandard initType = "standard" ) type pid struct { Pid int `json:"pid"` } // network is an internal struct used to setup container networks. type network struct { configs.Network // TempVethPeerName is a unique tempory veth peer name that was placed into // the container's namespace. TempVethPeerName string `json:"temp_veth_peer_name"` } // initConfig is used for transferring parameters from Exec() to Init() type initConfig struct { Args []string `json:"args"` Env []string `json:"env"` Cwd string `json:"cwd"` Capabilities []string `json:"capabilities"` User string `json:"user"` Config *configs.Config `json:"config"` Console string `json:"console"` Networks []*network `json:"network"` } type initer interface { Init() error } func newContainerInit(t initType, pipe *os.File) (initer, error) { var config *initConfig if err := json.NewDecoder(pipe).Decode(&config); err != nil { return nil, err } if err := populateProcessEnvironment(config.Env); err != nil { return nil, err } switch t { case initSetns: return &linuxSetnsInit{ config: config, }, nil case initStandard: return &linuxStandardInit{ parentPid: syscall.Getppid(), config: config, }, nil } return nil, fmt.Errorf("unknown init type %q", t) } // populateProcessEnvironment loads the provided environment variables into the // current processes's environment. func populateProcessEnvironment(env []string) error { for _, pair := range env { p := strings.SplitN(pair, "=", 2) if len(p) < 2 { return fmt.Errorf("invalid environment '%v'", pair) } if err := os.Setenv(p[0], p[1]); err != nil { return err } } return nil } // finalizeNamespace drops the caps, sets the correct user // and working dir, and closes any leaked file descriptors // before executing the command inside the namespace func finalizeNamespace(config *initConfig) error { // Ensure that all non-standard fds we may have accidentally // inherited are marked close-on-exec so they stay out of the // container if err := utils.CloseExecFrom(3); err != nil { return err } capabilities := config.Config.Capabilities if config.Capabilities != nil { capabilities = config.Capabilities } w, err := newCapWhitelist(capabilities) if err != nil { return err } // drop capabilities in bounding set before changing user if err := w.dropBoundingSet(); err != nil { return err } // preserve existing capabilities while we change users if err := system.SetKeepCaps(); err != nil { return err } if err := setupUser(config); err != nil { return err } if err := system.ClearKeepCaps(); err != nil { return err } // drop all other capabilities if err := w.drop(); err != nil { return err } if config.Cwd != "" { if err := syscall.Chdir(config.Cwd); err != nil { return err } } return nil } // joinExistingNamespaces gets all the namespace paths specified for the container and // does a setns on the namespace fd so that the current process joins the namespace. func joinExistingNamespaces(namespaces []configs.Namespace) error { for _, ns := range namespaces { if ns.Path != "" { f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0) if err != nil { return err } err = system.Setns(f.Fd(), uintptr(ns.Syscall())) f.Close() if err != nil { return err } } } return nil } // setupUser changes the groups, gid, and uid for the user inside the container func setupUser(config *initConfig) error { // Set up defaults. defaultExecUser := user.ExecUser{ Uid: syscall.Getuid(), Gid: syscall.Getgid(), Home: "/", } passwdPath, err := user.GetPasswdPath() if err != nil { return err } groupPath, err := user.GetGroupPath() if err != nil { return err } execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath) if err != nil { return err } suppGroups := append(execUser.Sgids, config.Config.AdditionalGroups...) if err := syscall.Setgroups(suppGroups); err != nil { return err } if err := system.Setgid(execUser.Gid); err != nil { return err } if err := system.Setuid(execUser.Uid); err != nil { return err } // if we didn't get HOME already, set it based on the user's HOME if envHome := os.Getenv("HOME"); envHome == "" { if err := os.Setenv("HOME", execUser.Home); err != nil { return err } } return nil } // setupNetwork sets up and initializes any network interface inside the container. func setupNetwork(config *initConfig) error { for _, config := range config.Networks { strategy, err := getStrategy(config.Type) if err != nil { return err } if err := strategy.initialize(config); err != nil { return err } } return nil } func setupRoute(config *configs.Config) error { for _, config := range config.Routes { if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil { return err } } return nil } func setupRlimits(config *configs.Config) error { for _, rlimit := range config.Rlimits { l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft} if err := syscall.Setrlimit(rlimit.Type, l); err != nil { return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err) } } return nil } // killCgroupProcesses freezes then iterates over all the processes inside the // manager's cgroups sending a SIGKILL to each process then waiting for them to // exit. func killCgroupProcesses(m cgroups.Manager) error { var procs []*os.Process if err := m.Freeze(configs.Frozen); err != nil { log.Warn(err) } pids, err := m.GetPids() if err != nil { m.Freeze(configs.Thawed) return err } for _, pid := range pids { if p, err := os.FindProcess(pid); err == nil { procs = append(procs, p) if err := p.Kill(); err != nil { log.Warn(err) } } } if err := m.Freeze(configs.Thawed); err != nil { log.Warn(err) } for _, p := range procs { if _, err := p.Wait(); err != nil { log.Warn(err) } } return nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/generic_error.go0000644000175000017500000000257412524212370023342 0ustar tianontianonpackage libcontainer import ( "fmt" "io" "text/template" "time" "github.com/docker/libcontainer/stacktrace" ) var errorTemplate = template.Must(template.New("error").Parse(`Timestamp: {{.Timestamp}} Code: {{.ECode}} {{if .Message }} Message: {{.Message}} {{end}} Frames:{{range $i, $frame := .Stack.Frames}} --- {{$i}}: {{$frame.Function}} Package: {{$frame.Package}} File: {{$frame.File}}@{{$frame.Line}}{{end}} `)) func newGenericError(err error, c ErrorCode) Error { if le, ok := err.(Error); ok { return le } gerr := &genericError{ Timestamp: time.Now(), Err: err, ECode: c, Stack: stacktrace.Capture(1), } if err != nil { gerr.Message = err.Error() } return gerr } func newSystemError(err error) Error { if le, ok := err.(Error); ok { return le } gerr := &genericError{ Timestamp: time.Now(), Err: err, ECode: SystemError, Stack: stacktrace.Capture(1), } if err != nil { gerr.Message = err.Error() } return gerr } type genericError struct { Timestamp time.Time ECode ErrorCode Err error `json:"-"` Message string Stack stacktrace.Stacktrace } func (e *genericError) Error() string { return fmt.Sprintf("[%d] %s: %s", e.ECode, e.ECode, e.Message) } func (e *genericError) Code() ErrorCode { return e.ECode } func (e *genericError) Detail(w io.Writer) error { return errorTemplate.Execute(w, e) } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/generic_error_test.go0000644000175000017500000000036012524212370024370 0ustar tianontianonpackage libcontainer import ( "fmt" "io/ioutil" "testing" ) func TestErrorDetail(t *testing.T) { err := newGenericError(fmt.Errorf("test error"), SystemError) if derr := err.Detail(ioutil.Discard); derr != nil { t.Fatal(derr) } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/factory_linux_test.go0000644000175000017500000001016112524212370024431 0ustar tianontianon// +build linux package libcontainer import ( "encoding/json" "io/ioutil" "os" "path/filepath" "testing" "github.com/docker/docker/pkg/mount" "github.com/docker/libcontainer/configs" ) func newTestRoot() (string, error) { dir, err := ioutil.TempDir("", "libcontainer") if err != nil { return "", err } return dir, nil } func TestFactoryNew(t *testing.T) { root, rerr := newTestRoot() if rerr != nil { t.Fatal(rerr) } defer os.RemoveAll(root) factory, err := New(root, Cgroupfs) if err != nil { t.Fatal(err) } if factory == nil { t.Fatal("factory should not be nil") } lfactory, ok := factory.(*LinuxFactory) if !ok { t.Fatal("expected linux factory returned on linux based systems") } if lfactory.Root != root { t.Fatalf("expected factory root to be %q but received %q", root, lfactory.Root) } if factory.Type() != "libcontainer" { t.Fatalf("unexpected factory type: %q, expected %q", factory.Type(), "libcontainer") } } func TestFactoryNewTmpfs(t *testing.T) { root, rerr := newTestRoot() if rerr != nil { t.Fatal(rerr) } defer os.RemoveAll(root) factory, err := New(root, Cgroupfs, TmpfsRoot) if err != nil { t.Fatal(err) } if factory == nil { t.Fatal("factory should not be nil") } lfactory, ok := factory.(*LinuxFactory) if !ok { t.Fatal("expected linux factory returned on linux based systems") } if lfactory.Root != root { t.Fatalf("expected factory root to be %q but received %q", root, lfactory.Root) } if factory.Type() != "libcontainer" { t.Fatalf("unexpected factory type: %q, expected %q", factory.Type(), "libcontainer") } mounted, err := mount.Mounted(lfactory.Root) if err != nil { t.Fatal(err) } if !mounted { t.Fatalf("Factory Root is not mounted") } mounts, err := mount.GetMounts() if err != nil { t.Fatal(err) } var found bool for _, m := range mounts { if m.Mountpoint == lfactory.Root { if m.Fstype != "tmpfs" { t.Fatalf("Fstype of root: %s, expected %s", m.Fstype, "tmpfs") } if m.Source != "tmpfs" { t.Fatalf("Source of root: %s, expected %s", m.Source, "tmpfs") } found = true } } if !found { t.Fatalf("Factory Root is not listed in mounts list") } } func TestFactoryLoadNotExists(t *testing.T) { root, rerr := newTestRoot() if rerr != nil { t.Fatal(rerr) } defer os.RemoveAll(root) factory, err := New(root, Cgroupfs) if err != nil { t.Fatal(err) } _, err = factory.Load("nocontainer") if err == nil { t.Fatal("expected nil error loading non-existing container") } lerr, ok := err.(Error) if !ok { t.Fatal("expected libcontainer error type") } if lerr.Code() != ContainerNotExists { t.Fatalf("expected error code %s but received %s", ContainerNotExists, lerr.Code()) } } func TestFactoryLoadContainer(t *testing.T) { root, err := newTestRoot() if err != nil { t.Fatal(err) } defer os.RemoveAll(root) // setup default container config and state for mocking var ( id = "1" expectedConfig = &configs.Config{ Rootfs: "/mycontainer/root", } expectedState = &State{ InitProcessPid: 1024, Config: *expectedConfig, } ) if err := os.Mkdir(filepath.Join(root, id), 0700); err != nil { t.Fatal(err) } if err := marshal(filepath.Join(root, id, stateFilename), expectedState); err != nil { t.Fatal(err) } factory, err := New(root, Cgroupfs) if err != nil { t.Fatal(err) } container, err := factory.Load(id) if err != nil { t.Fatal(err) } if container.ID() != id { t.Fatalf("expected container id %q but received %q", id, container.ID()) } config := container.Config() if config.Rootfs != expectedConfig.Rootfs { t.Fatalf("expected rootfs %q but received %q", expectedConfig.Rootfs, config.Rootfs) } lcontainer, ok := container.(*linuxContainer) if !ok { t.Fatal("expected linux container on linux based systems") } if lcontainer.initProcess.pid() != expectedState.InitProcessPid { t.Fatalf("expected init pid %d but received %d", expectedState.InitProcessPid, lcontainer.initProcess.pid()) } } func marshal(path string, v interface{}) error { f, err := os.Create(path) if err != nil { return err } defer f.Close() return json.NewEncoder(f).Encode(v) } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/capabilities_linux.go0000644000175000017500000000562612524212370024366 0ustar tianontianon// +build linux package libcontainer import ( "fmt" "os" "github.com/syndtr/gocapability/capability" ) const allCapabilityTypes = capability.CAPS | capability.BOUNDS var capabilityList = map[string]capability.Cap{ "SETPCAP": capability.CAP_SETPCAP, "SYS_MODULE": capability.CAP_SYS_MODULE, "SYS_RAWIO": capability.CAP_SYS_RAWIO, "SYS_PACCT": capability.CAP_SYS_PACCT, "SYS_ADMIN": capability.CAP_SYS_ADMIN, "SYS_NICE": capability.CAP_SYS_NICE, "SYS_RESOURCE": capability.CAP_SYS_RESOURCE, "SYS_TIME": capability.CAP_SYS_TIME, "SYS_TTY_CONFIG": capability.CAP_SYS_TTY_CONFIG, "MKNOD": capability.CAP_MKNOD, "AUDIT_WRITE": capability.CAP_AUDIT_WRITE, "AUDIT_CONTROL": capability.CAP_AUDIT_CONTROL, "MAC_OVERRIDE": capability.CAP_MAC_OVERRIDE, "MAC_ADMIN": capability.CAP_MAC_ADMIN, "NET_ADMIN": capability.CAP_NET_ADMIN, "SYSLOG": capability.CAP_SYSLOG, "CHOWN": capability.CAP_CHOWN, "NET_RAW": capability.CAP_NET_RAW, "DAC_OVERRIDE": capability.CAP_DAC_OVERRIDE, "FOWNER": capability.CAP_FOWNER, "DAC_READ_SEARCH": capability.CAP_DAC_READ_SEARCH, "FSETID": capability.CAP_FSETID, "KILL": capability.CAP_KILL, "SETGID": capability.CAP_SETGID, "SETUID": capability.CAP_SETUID, "LINUX_IMMUTABLE": capability.CAP_LINUX_IMMUTABLE, "NET_BIND_SERVICE": capability.CAP_NET_BIND_SERVICE, "NET_BROADCAST": capability.CAP_NET_BROADCAST, "IPC_LOCK": capability.CAP_IPC_LOCK, "IPC_OWNER": capability.CAP_IPC_OWNER, "SYS_CHROOT": capability.CAP_SYS_CHROOT, "SYS_PTRACE": capability.CAP_SYS_PTRACE, "SYS_BOOT": capability.CAP_SYS_BOOT, "LEASE": capability.CAP_LEASE, "SETFCAP": capability.CAP_SETFCAP, "WAKE_ALARM": capability.CAP_WAKE_ALARM, "BLOCK_SUSPEND": capability.CAP_BLOCK_SUSPEND, "AUDIT_READ": capability.CAP_AUDIT_READ, } func newCapWhitelist(caps []string) (*whitelist, error) { l := []capability.Cap{} for _, c := range caps { v, ok := capabilityList[c] if !ok { return nil, fmt.Errorf("unknown capability %q", c) } l = append(l, v) } pid, err := capability.NewPid(os.Getpid()) if err != nil { return nil, err } return &whitelist{ keep: l, pid: pid, }, nil } type whitelist struct { pid capability.Capabilities keep []capability.Cap } // dropBoundingSet drops the capability bounding set to those specified in the whitelist. func (w *whitelist) dropBoundingSet() error { w.pid.Clear(capability.BOUNDS) w.pid.Set(capability.BOUNDS, w.keep...) return w.pid.Apply(capability.BOUNDS) } // drop drops all capabilities for the current process except those specified in the whitelist. func (w *whitelist) drop() error { w.pid.Clear(allCapabilityTypes) w.pid.Set(allCapabilityTypes, w.keep...) return w.pid.Apply(allCapabilityTypes) } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/user/0000755000175000017500000000000012524212370021134 5ustar tianontianonlibcontainer-227771c8f611f03639f0eeb169428761d9504ab5/user/lookup.go0000644000175000017500000000573512524212370023006 0ustar tianontianonpackage user import ( "errors" "fmt" "syscall" ) var ( // The current operating system does not provide the required data for user lookups. ErrUnsupported = errors.New("user lookup: operating system does not provide passwd-formatted data") ) func lookupUser(filter func(u User) bool) (User, error) { // Get operating system-specific passwd reader-closer. passwd, err := GetPasswd() if err != nil { return User{}, err } defer passwd.Close() // Get the users. users, err := ParsePasswdFilter(passwd, filter) if err != nil { return User{}, err } // No user entries found. if len(users) == 0 { return User{}, fmt.Errorf("no matching entries in passwd file") } // Assume the first entry is the "correct" one. return users[0], nil } // CurrentUser looks up the current user by their user id in /etc/passwd. If the // user cannot be found (or there is no /etc/passwd file on the filesystem), // then CurrentUser returns an error. func CurrentUser() (User, error) { return LookupUid(syscall.Getuid()) } // LookupUser looks up a user by their username in /etc/passwd. If the user // cannot be found (or there is no /etc/passwd file on the filesystem), then // LookupUser returns an error. func LookupUser(username string) (User, error) { return lookupUser(func(u User) bool { return u.Name == username }) } // LookupUid looks up a user by their user id in /etc/passwd. If the user cannot // be found (or there is no /etc/passwd file on the filesystem), then LookupId // returns an error. func LookupUid(uid int) (User, error) { return lookupUser(func(u User) bool { return u.Uid == uid }) } func lookupGroup(filter func(g Group) bool) (Group, error) { // Get operating system-specific group reader-closer. group, err := GetGroup() if err != nil { return Group{}, err } defer group.Close() // Get the users. groups, err := ParseGroupFilter(group, filter) if err != nil { return Group{}, err } // No user entries found. if len(groups) == 0 { return Group{}, fmt.Errorf("no matching entries in group file") } // Assume the first entry is the "correct" one. return groups[0], nil } // CurrentGroup looks up the current user's group by their primary group id's // entry in /etc/passwd. If the group cannot be found (or there is no // /etc/group file on the filesystem), then CurrentGroup returns an error. func CurrentGroup() (Group, error) { return LookupGid(syscall.Getgid()) } // LookupGroup looks up a group by its name in /etc/group. If the group cannot // be found (or there is no /etc/group file on the filesystem), then LookupGroup // returns an error. func LookupGroup(groupname string) (Group, error) { return lookupGroup(func(g Group) bool { return g.Name == groupname }) } // LookupGid looks up a group by its group id in /etc/group. If the group cannot // be found (or there is no /etc/group file on the filesystem), then LookupGid // returns an error. func LookupGid(gid int) (Group, error) { return lookupGroup(func(g Group) bool { return g.Gid == gid }) } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/user/MAINTAINERS0000644000175000017500000000013012524212370022623 0ustar tianontianonTianon Gravi (@tianon) Aleksa Sarai (@cyphar) libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/user/lookup_unsupported.go0000644000175000017500000000057412524212370025452 0ustar tianontianon// +build !darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris package user import "io" func GetPasswdPath() (string, error) { return "", ErrUnsupported } func GetPasswd() (io.ReadCloser, error) { return nil, ErrUnsupported } func GetGroupPath() (string, error) { return "", ErrUnsupported } func GetGroup() (io.ReadCloser, error) { return nil, ErrUnsupported } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/user/user.go0000644000175000017500000001755312524212370022454 0ustar tianontianonpackage user import ( "bufio" "fmt" "io" "os" "strconv" "strings" ) const ( minId = 0 maxId = 1<<31 - 1 //for 32-bit systems compatibility ) var ( ErrRange = fmt.Errorf("Uids and gids must be in range %d-%d", minId, maxId) ) type User struct { Name string Pass string Uid int Gid int Gecos string Home string Shell string } type Group struct { Name string Pass string Gid int List []string } func parseLine(line string, v ...interface{}) { if line == "" { return } parts := strings.Split(line, ":") for i, p := range parts { if len(v) <= i { // if we have more "parts" than we have places to put them, bail for great "tolerance" of naughty configuration files break } switch e := v[i].(type) { case *string: // "root", "adm", "/bin/bash" *e = p case *int: // "0", "4", "1000" // ignore string to int conversion errors, for great "tolerance" of naughty configuration files *e, _ = strconv.Atoi(p) case *[]string: // "", "root", "root,adm,daemon" if p != "" { *e = strings.Split(p, ",") } else { *e = []string{} } default: // panic, because this is a programming/logic error, not a runtime one panic("parseLine expects only pointers! argument " + strconv.Itoa(i) + " is not a pointer!") } } } func ParsePasswdFile(path string) ([]User, error) { passwd, err := os.Open(path) if err != nil { return nil, err } defer passwd.Close() return ParsePasswd(passwd) } func ParsePasswd(passwd io.Reader) ([]User, error) { return ParsePasswdFilter(passwd, nil) } func ParsePasswdFileFilter(path string, filter func(User) bool) ([]User, error) { passwd, err := os.Open(path) if err != nil { return nil, err } defer passwd.Close() return ParsePasswdFilter(passwd, filter) } func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) { if r == nil { return nil, fmt.Errorf("nil source for passwd-formatted data") } var ( s = bufio.NewScanner(r) out = []User{} ) for s.Scan() { if err := s.Err(); err != nil { return nil, err } text := strings.TrimSpace(s.Text()) if text == "" { continue } // see: man 5 passwd // name:password:UID:GID:GECOS:directory:shell // Name:Pass:Uid:Gid:Gecos:Home:Shell // root:x:0:0:root:/root:/bin/bash // adm:x:3:4:adm:/var/adm:/bin/false p := User{} parseLine( text, &p.Name, &p.Pass, &p.Uid, &p.Gid, &p.Gecos, &p.Home, &p.Shell, ) if filter == nil || filter(p) { out = append(out, p) } } return out, nil } func ParseGroupFile(path string) ([]Group, error) { group, err := os.Open(path) if err != nil { return nil, err } defer group.Close() return ParseGroup(group) } func ParseGroup(group io.Reader) ([]Group, error) { return ParseGroupFilter(group, nil) } func ParseGroupFileFilter(path string, filter func(Group) bool) ([]Group, error) { group, err := os.Open(path) if err != nil { return nil, err } defer group.Close() return ParseGroupFilter(group, filter) } func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) { if r == nil { return nil, fmt.Errorf("nil source for group-formatted data") } var ( s = bufio.NewScanner(r) out = []Group{} ) for s.Scan() { if err := s.Err(); err != nil { return nil, err } text := s.Text() if text == "" { continue } // see: man 5 group // group_name:password:GID:user_list // Name:Pass:Gid:List // root:x:0:root // adm:x:4:root,adm,daemon p := Group{} parseLine( text, &p.Name, &p.Pass, &p.Gid, &p.List, ) if filter == nil || filter(p) { out = append(out, p) } } return out, nil } type ExecUser struct { Uid, Gid int Sgids []int Home string } // GetExecUserPath is a wrapper for GetExecUser. It reads data from each of the // given file paths and uses that data as the arguments to GetExecUser. If the // files cannot be opened for any reason, the error is ignored and a nil // io.Reader is passed instead. func GetExecUserPath(userSpec string, defaults *ExecUser, passwdPath, groupPath string) (*ExecUser, error) { passwd, err := os.Open(passwdPath) if err != nil { passwd = nil } else { defer passwd.Close() } group, err := os.Open(groupPath) if err != nil { group = nil } else { defer group.Close() } return GetExecUser(userSpec, defaults, passwd, group) } // GetExecUser parses a user specification string (using the passwd and group // readers as sources for /etc/passwd and /etc/group data, respectively). In // the case of blank fields or missing data from the sources, the values in // defaults is used. // // GetExecUser will return an error if a user or group literal could not be // found in any entry in passwd and group respectively. // // Examples of valid user specifications are: // * "" // * "user" // * "uid" // * "user:group" // * "uid:gid // * "user:gid" // * "uid:group" func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (*ExecUser, error) { var ( userArg, groupArg string name string ) if defaults == nil { defaults = new(ExecUser) } // Copy over defaults. user := &ExecUser{ Uid: defaults.Uid, Gid: defaults.Gid, Sgids: defaults.Sgids, Home: defaults.Home, } // Sgids slice *cannot* be nil. if user.Sgids == nil { user.Sgids = []int{} } // allow for userArg to have either "user" syntax, or optionally "user:group" syntax parseLine(userSpec, &userArg, &groupArg) users, err := ParsePasswdFilter(passwd, func(u User) bool { if userArg == "" { return u.Uid == user.Uid } return u.Name == userArg || strconv.Itoa(u.Uid) == userArg }) if err != nil && passwd != nil { if userArg == "" { userArg = strconv.Itoa(user.Uid) } return nil, fmt.Errorf("Unable to find user %v: %v", userArg, err) } haveUser := users != nil && len(users) > 0 if haveUser { // if we found any user entries that matched our filter, let's take the first one as "correct" name = users[0].Name user.Uid = users[0].Uid user.Gid = users[0].Gid user.Home = users[0].Home } else if userArg != "" { // we asked for a user but didn't find them... let's check to see if we wanted a numeric user user.Uid, err = strconv.Atoi(userArg) if err != nil { // not numeric - we have to bail return nil, fmt.Errorf("Unable to find user %v", userArg) } // Must be inside valid uid range. if user.Uid < minId || user.Uid > maxId { return nil, ErrRange } // if userArg couldn't be found in /etc/passwd but is numeric, just roll with it - this is legit } if groupArg != "" || name != "" { groups, err := ParseGroupFilter(group, func(g Group) bool { // Explicit group format takes precedence. if groupArg != "" { return g.Name == groupArg || strconv.Itoa(g.Gid) == groupArg } // Check if user is a member. for _, u := range g.List { if u == name { return true } } return false }) if err != nil && group != nil { return nil, fmt.Errorf("Unable to find groups for user %v: %v", users[0].Name, err) } haveGroup := groups != nil && len(groups) > 0 if groupArg != "" { if haveGroup { // if we found any group entries that matched our filter, let's take the first one as "correct" user.Gid = groups[0].Gid } else { // we asked for a group but didn't find id... let's check to see if we wanted a numeric group user.Gid, err = strconv.Atoi(groupArg) if err != nil { // not numeric - we have to bail return nil, fmt.Errorf("Unable to find group %v", groupArg) } // Ensure gid is inside gid range. if user.Gid < minId || user.Gid > maxId { return nil, ErrRange } // if groupArg couldn't be found in /etc/group but is numeric, just roll with it - this is legit } } else if haveGroup { // If implicit group format, fill supplementary gids. user.Sgids = make([]int, len(groups)) for i, group := range groups { user.Sgids[i] = group.Gid } } } return user, nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/user/lookup_unix.go0000644000175000017500000000102112524212370024031 0ustar tianontianon// +build darwin dragonfly freebsd linux netbsd openbsd solaris package user import ( "io" "os" ) // Unix-specific path to the passwd and group formatted files. const ( unixPasswdPath = "/etc/passwd" unixGroupPath = "/etc/group" ) func GetPasswdPath() (string, error) { return unixPasswdPath, nil } func GetPasswd() (io.ReadCloser, error) { return os.Open(unixPasswdPath) } func GetGroupPath() (string, error) { return unixGroupPath, nil } func GetGroup() (io.ReadCloser, error) { return os.Open(unixGroupPath) } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/user/user_test.go0000644000175000017500000001620412524212370023503 0ustar tianontianonpackage user import ( "io" "reflect" "strings" "testing" ) func TestUserParseLine(t *testing.T) { var ( a, b string c []string d int ) parseLine("", &a, &b) if a != "" || b != "" { t.Fatalf("a and b should be empty ('%v', '%v')", a, b) } parseLine("a", &a, &b) if a != "a" || b != "" { t.Fatalf("a should be 'a' and b should be empty ('%v', '%v')", a, b) } parseLine("bad boys:corny cows", &a, &b) if a != "bad boys" || b != "corny cows" { t.Fatalf("a should be 'bad boys' and b should be 'corny cows' ('%v', '%v')", a, b) } parseLine("", &c) if len(c) != 0 { t.Fatalf("c should be empty (%#v)", c) } parseLine("d,e,f:g:h:i,j,k", &c, &a, &b, &c) if a != "g" || b != "h" || len(c) != 3 || c[0] != "i" || c[1] != "j" || c[2] != "k" { t.Fatalf("a should be 'g', b should be 'h', and c should be ['i','j','k'] ('%v', '%v', '%#v')", a, b, c) } parseLine("::::::::::", &a, &b, &c) if a != "" || b != "" || len(c) != 0 { t.Fatalf("a, b, and c should all be empty ('%v', '%v', '%#v')", a, b, c) } parseLine("not a number", &d) if d != 0 { t.Fatalf("d should be 0 (%v)", d) } parseLine("b:12:c", &a, &d, &b) if a != "b" || b != "c" || d != 12 { t.Fatalf("a should be 'b' and b should be 'c', and d should be 12 ('%v', '%v', %v)", a, b, d) } } func TestUserParsePasswd(t *testing.T) { users, err := ParsePasswdFilter(strings.NewReader(` root:x:0:0:root:/root:/bin/bash adm:x:3:4:adm:/var/adm:/bin/false this is just some garbage data `), nil) if err != nil { t.Fatalf("Unexpected error: %v", err) } if len(users) != 3 { t.Fatalf("Expected 3 users, got %v", len(users)) } if users[0].Uid != 0 || users[0].Name != "root" { t.Fatalf("Expected users[0] to be 0 - root, got %v - %v", users[0].Uid, users[0].Name) } if users[1].Uid != 3 || users[1].Name != "adm" { t.Fatalf("Expected users[1] to be 3 - adm, got %v - %v", users[1].Uid, users[1].Name) } } func TestUserParseGroup(t *testing.T) { groups, err := ParseGroupFilter(strings.NewReader(` root:x:0:root adm:x:4:root,adm,daemon this is just some garbage data `), nil) if err != nil { t.Fatalf("Unexpected error: %v", err) } if len(groups) != 3 { t.Fatalf("Expected 3 groups, got %v", len(groups)) } if groups[0].Gid != 0 || groups[0].Name != "root" || len(groups[0].List) != 1 { t.Fatalf("Expected groups[0] to be 0 - root - 1 member, got %v - %v - %v", groups[0].Gid, groups[0].Name, len(groups[0].List)) } if groups[1].Gid != 4 || groups[1].Name != "adm" || len(groups[1].List) != 3 { t.Fatalf("Expected groups[1] to be 4 - adm - 3 members, got %v - %v - %v", groups[1].Gid, groups[1].Name, len(groups[1].List)) } } func TestValidGetExecUser(t *testing.T) { const passwdContent = ` root:x:0:0:root user:/root:/bin/bash adm:x:42:43:adm:/var/adm:/bin/false this is just some garbage data ` const groupContent = ` root:x:0:root adm:x:43: grp:x:1234:root,adm this is just some garbage data ` defaultExecUser := ExecUser{ Uid: 8888, Gid: 8888, Sgids: []int{8888}, Home: "/8888", } tests := []struct { ref string expected ExecUser }{ { ref: "root", expected: ExecUser{ Uid: 0, Gid: 0, Sgids: []int{0, 1234}, Home: "/root", }, }, { ref: "adm", expected: ExecUser{ Uid: 42, Gid: 43, Sgids: []int{1234}, Home: "/var/adm", }, }, { ref: "root:adm", expected: ExecUser{ Uid: 0, Gid: 43, Sgids: defaultExecUser.Sgids, Home: "/root", }, }, { ref: "adm:1234", expected: ExecUser{ Uid: 42, Gid: 1234, Sgids: defaultExecUser.Sgids, Home: "/var/adm", }, }, { ref: "42:1234", expected: ExecUser{ Uid: 42, Gid: 1234, Sgids: defaultExecUser.Sgids, Home: "/var/adm", }, }, { ref: "1337:1234", expected: ExecUser{ Uid: 1337, Gid: 1234, Sgids: defaultExecUser.Sgids, Home: defaultExecUser.Home, }, }, { ref: "1337", expected: ExecUser{ Uid: 1337, Gid: defaultExecUser.Gid, Sgids: defaultExecUser.Sgids, Home: defaultExecUser.Home, }, }, { ref: "", expected: ExecUser{ Uid: defaultExecUser.Uid, Gid: defaultExecUser.Gid, Sgids: defaultExecUser.Sgids, Home: defaultExecUser.Home, }, }, } for _, test := range tests { passwd := strings.NewReader(passwdContent) group := strings.NewReader(groupContent) execUser, err := GetExecUser(test.ref, &defaultExecUser, passwd, group) if err != nil { t.Logf("got unexpected error when parsing '%s': %s", test.ref, err.Error()) t.Fail() continue } if !reflect.DeepEqual(test.expected, *execUser) { t.Logf("got: %#v", execUser) t.Logf("expected: %#v", test.expected) t.Fail() continue } } } func TestInvalidGetExecUser(t *testing.T) { const passwdContent = ` root:x:0:0:root user:/root:/bin/bash adm:x:42:43:adm:/var/adm:/bin/false this is just some garbage data ` const groupContent = ` root:x:0:root adm:x:43: grp:x:1234:root,adm this is just some garbage data ` tests := []string{ // No such user/group. "notuser", "notuser:notgroup", "root:notgroup", "notuser:adm", "8888:notgroup", "notuser:8888", // Invalid user/group values. "-1:0", "0:-3", "-5:-2", } for _, test := range tests { passwd := strings.NewReader(passwdContent) group := strings.NewReader(groupContent) execUser, err := GetExecUser(test, nil, passwd, group) if err == nil { t.Logf("got unexpected success when parsing '%s': %#v", test, execUser) t.Fail() continue } } } func TestGetExecUserNilSources(t *testing.T) { const passwdContent = ` root:x:0:0:root user:/root:/bin/bash adm:x:42:43:adm:/var/adm:/bin/false this is just some garbage data ` const groupContent = ` root:x:0:root adm:x:43: grp:x:1234:root,adm this is just some garbage data ` defaultExecUser := ExecUser{ Uid: 8888, Gid: 8888, Sgids: []int{8888}, Home: "/8888", } tests := []struct { ref string passwd, group bool expected ExecUser }{ { ref: "", passwd: false, group: false, expected: ExecUser{ Uid: 8888, Gid: 8888, Sgids: []int{8888}, Home: "/8888", }, }, { ref: "root", passwd: true, group: false, expected: ExecUser{ Uid: 0, Gid: 0, Sgids: []int{8888}, Home: "/root", }, }, { ref: "0", passwd: false, group: false, expected: ExecUser{ Uid: 0, Gid: 8888, Sgids: []int{8888}, Home: "/8888", }, }, { ref: "0:0", passwd: false, group: false, expected: ExecUser{ Uid: 0, Gid: 0, Sgids: []int{8888}, Home: "/8888", }, }, } for _, test := range tests { var passwd, group io.Reader if test.passwd { passwd = strings.NewReader(passwdContent) } if test.group { group = strings.NewReader(groupContent) } execUser, err := GetExecUser(test.ref, &defaultExecUser, passwd, group) if err != nil { t.Logf("got unexpected error when parsing '%s': %s", test.ref, err.Error()) t.Fail() continue } if !reflect.DeepEqual(test.expected, *execUser) { t.Logf("got: %#v", execUser) t.Logf("expected: %#v", test.expected) t.Fail() continue } } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/network_linux.go0000644000175000017500000001277312524212370023427 0ustar tianontianon// +build linux package libcontainer import ( "fmt" "io/ioutil" "net" "path/filepath" "strconv" "strings" "github.com/docker/libcontainer/netlink" "github.com/docker/libcontainer/utils" ) var strategies = map[string]networkStrategy{ "veth": &veth{}, "loopback": &loopback{}, } // networkStrategy represents a specific network configuration for // a container's networking stack type networkStrategy interface { create(*network, int) error initialize(*network) error } // getStrategy returns the specific network strategy for the // provided type. func getStrategy(tpe string) (networkStrategy, error) { s, exists := strategies[tpe] if !exists { return nil, fmt.Errorf("unknown strategy type %q", tpe) } return s, nil } // Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo. func getNetworkInterfaceStats(interfaceName string) (*NetworkInterface, error) { out := &NetworkInterface{Name: interfaceName} // This can happen if the network runtime information is missing - possible if the // container was created by an old version of libcontainer. if interfaceName == "" { return out, nil } type netStatsPair struct { // Where to write the output. Out *uint64 // The network stats file to read. File string } // Ingress for host veth is from the container. Hence tx_bytes stat on the host veth is actually number of bytes received by the container. netStats := []netStatsPair{ {Out: &out.RxBytes, File: "tx_bytes"}, {Out: &out.RxPackets, File: "tx_packets"}, {Out: &out.RxErrors, File: "tx_errors"}, {Out: &out.RxDropped, File: "tx_dropped"}, {Out: &out.TxBytes, File: "rx_bytes"}, {Out: &out.TxPackets, File: "rx_packets"}, {Out: &out.TxErrors, File: "rx_errors"}, {Out: &out.TxDropped, File: "rx_dropped"}, } for _, netStat := range netStats { data, err := readSysfsNetworkStats(interfaceName, netStat.File) if err != nil { return nil, err } *(netStat.Out) = data } return out, nil } // Reads the specified statistics available under /sys/class/net//statistics func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) { data, err := ioutil.ReadFile(filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile)) if err != nil { return 0, err } return strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64) } // loopback is a network strategy that provides a basic loopback device type loopback struct { } func (l *loopback) create(n *network, nspid int) error { return nil } func (l *loopback) initialize(config *network) error { iface, err := net.InterfaceByName("lo") if err != nil { return err } return netlink.NetworkLinkUp(iface) } // veth is a network strategy that uses a bridge and creates // a veth pair, one that is attached to the bridge on the host and the other // is placed inside the container's namespace type veth struct { } func (v *veth) create(n *network, nspid int) (err error) { tmpName, err := v.generateTempPeerName() if err != nil { return err } n.TempVethPeerName = tmpName defer func() { if err != nil { netlink.NetworkLinkDel(n.HostInterfaceName) netlink.NetworkLinkDel(n.TempVethPeerName) } }() if n.Bridge == "" { return fmt.Errorf("bridge is not specified") } bridge, err := net.InterfaceByName(n.Bridge) if err != nil { return err } if err := netlink.NetworkCreateVethPair(n.HostInterfaceName, n.TempVethPeerName, n.TxQueueLen); err != nil { return err } host, err := net.InterfaceByName(n.HostInterfaceName) if err != nil { return err } if err := netlink.AddToBridge(host, bridge); err != nil { return err } if err := netlink.NetworkSetMTU(host, n.Mtu); err != nil { return err } if n.HairpinMode { if err := netlink.SetHairpinMode(host, true); err != nil { return err } } if err := netlink.NetworkLinkUp(host); err != nil { return err } child, err := net.InterfaceByName(n.TempVethPeerName) if err != nil { return err } return netlink.NetworkSetNsPid(child, nspid) } func (v *veth) generateTempPeerName() (string, error) { return utils.GenerateRandomName("veth", 7) } func (v *veth) initialize(config *network) error { peer := config.TempVethPeerName if peer == "" { return fmt.Errorf("peer is not specified") } child, err := net.InterfaceByName(peer) if err != nil { return err } if err := netlink.NetworkLinkDown(child); err != nil { return err } if err := netlink.NetworkChangeName(child, config.Name); err != nil { return err } // get the interface again after we changed the name as the index also changes. if child, err = net.InterfaceByName(config.Name); err != nil { return err } if config.MacAddress != "" { if err := netlink.NetworkSetMacAddress(child, config.MacAddress); err != nil { return err } } ip, ipNet, err := net.ParseCIDR(config.Address) if err != nil { return err } if err := netlink.NetworkLinkAddIp(child, ip, ipNet); err != nil { return err } if config.IPv6Address != "" { if ip, ipNet, err = net.ParseCIDR(config.IPv6Address); err != nil { return err } if err := netlink.NetworkLinkAddIp(child, ip, ipNet); err != nil { return err } } if err := netlink.NetworkSetMTU(child, config.Mtu); err != nil { return err } if err := netlink.NetworkLinkUp(child); err != nil { return err } if config.Gateway != "" { if err := netlink.AddDefaultGw(config.Gateway, config.Name); err != nil { return err } } if config.IPv6Gateway != "" { if err := netlink.AddDefaultGw(config.IPv6Gateway, config.Name); err != nil { return err } } return nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/sample_configs/0000755000175000017500000000000012524212370023147 5ustar tianontianonlibcontainer-227771c8f611f03639f0eeb169428761d9504ab5/sample_configs/minimal.json0000644000175000017500000001225212524212370025472 0ustar tianontianon{ "no_pivot_root": false, "parent_death_signal": 0, "pivot_dir": "", "rootfs": "/home/michael/development/gocode/src/github.com/docker/libcontainer", "readonlyfs": false, "mounts": [ { "source": "shm", "destination": "/dev/shm", "device": "tmpfs", "flags": 14, "data": "mode=1777,size=65536k", "relabel": "" }, { "source": "mqueue", "destination": "/dev/mqueue", "device": "mqueue", "flags": 14, "data": "", "relabel": "" }, { "source": "sysfs", "destination": "/sys", "device": "sysfs", "flags": 15, "data": "", "relabel": "" } ], "devices": [ { "type": 99, "path": "/dev/fuse", "major": 10, "minor": 229, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/null", "major": 1, "minor": 3, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/zero", "major": 1, "minor": 5, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/full", "major": 1, "minor": 7, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/tty", "major": 5, "minor": 0, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/urandom", "major": 1, "minor": 9, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/random", "major": 1, "minor": 8, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 } ], "mount_label": "", "hostname": "nsinit", "namespaces": [ { "type": "NEWNS", "path": "" }, { "type": "NEWUTS", "path": "" }, { "type": "NEWIPC", "path": "" }, { "type": "NEWPID", "path": "" }, { "type": "NEWNET", "path": "" } ], "capabilities": [ "CHOWN", "DAC_OVERRIDE", "FSETID", "FOWNER", "MKNOD", "NET_RAW", "SETGID", "SETUID", "SETFCAP", "SETPCAP", "NET_BIND_SERVICE", "SYS_CHROOT", "KILL", "AUDIT_WRITE" ], "networks": [ { "type": "loopback", "name": "", "bridge": "", "mac_address": "", "address": "127.0.0.1/0", "gateway": "localhost", "ipv6_address": "", "ipv6_gateway": "", "mtu": 0, "txqueuelen": 0, "host_interface_name": "" } ], "routes": null, "cgroups": { "name": "libcontainer", "parent": "nsinit", "allow_all_devices": false, "allowed_devices": [ { "type": 99, "path": "", "major": -1, "minor": -1, "permissions": "m", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 98, "path": "", "major": -1, "minor": -1, "permissions": "m", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/console", "major": 5, "minor": 1, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/tty0", "major": 4, "minor": 0, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/tty1", "major": 4, "minor": 1, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "", "major": 136, "minor": -1, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "", "major": 5, "minor": 2, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "", "major": 10, "minor": 200, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/null", "major": 1, "minor": 3, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/zero", "major": 1, "minor": 5, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/full", "major": 1, "minor": 7, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/tty", "major": 5, "minor": 0, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/urandom", "major": 1, "minor": 9, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/random", "major": 1, "minor": 8, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 } ], "memory": 0, "memory_reservation": 0, "memory_swap": 0, "cpu_shares": 0, "cpu_quota": 0, "cpu_period": 0, "cpuset_cpus": "", "cpuset_mems": "", "blkio_weight": 0, "freezer": "", "slice": "" }, "apparmor_profile": "", "process_label": "", "rlimits": [ { "type": 7, "hard": 1024, "soft": 1024 } ], "additional_groups": null, "uid_mappings": null, "gid_mappings": null, "mask_paths": [ "/proc/kcore" ], "readonly_paths": [ "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus" ] } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/sample_configs/README.md0000644000175000017500000000040112524212370024421 0ustar tianontianonThese configuration files can be used with `nsinit` to quickly develop, test, and experiment with features of libcontainer. When consuming these configuration files, copy them into your rootfs and rename the file to `container.json` for use with `nsinit`. libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/sample_configs/host-pid.json0000644000175000017500000001211712524212370025573 0ustar tianontianon{ "no_pivot_root": false, "parent_death_signal": 0, "pivot_dir": "", "rootfs": "/rootfs/jessie", "readonlyfs": false, "mounts": [ { "source": "shm", "destination": "/dev/shm", "device": "tmpfs", "flags": 14, "data": "mode=1777,size=65536k", "relabel": "" }, { "source": "mqueue", "destination": "/dev/mqueue", "device": "mqueue", "flags": 14, "data": "", "relabel": "" }, { "source": "sysfs", "destination": "/sys", "device": "sysfs", "flags": 15, "data": "", "relabel": "" } ], "devices": [ { "type": 99, "path": "/dev/fuse", "major": 10, "minor": 229, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/null", "major": 1, "minor": 3, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/zero", "major": 1, "minor": 5, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/full", "major": 1, "minor": 7, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/tty", "major": 5, "minor": 0, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/urandom", "major": 1, "minor": 9, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/random", "major": 1, "minor": 8, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 } ], "mount_label": "", "hostname": "nsinit", "namespaces": [ { "type": "NEWNS", "path": "" }, { "type": "NEWUTS", "path": "" }, { "type": "NEWIPC", "path": "" }, { "type": "NEWNET", "path": "" } ], "capabilities": [ "CHOWN", "DAC_OVERRIDE", "FSETID", "FOWNER", "MKNOD", "NET_RAW", "SETGID", "SETUID", "SETFCAP", "SETPCAP", "NET_BIND_SERVICE", "SYS_CHROOT", "KILL", "AUDIT_WRITE" ], "networks": [ { "type": "loopback", "name": "", "bridge": "", "mac_address": "", "address": "127.0.0.1/0", "gateway": "localhost", "ipv6_address": "", "ipv6_gateway": "", "mtu": 0, "txqueuelen": 0, "host_interface_name": "" } ], "routes": null, "cgroups": { "name": "libcontainer", "parent": "nsinit", "allow_all_devices": false, "allowed_devices": [ { "type": 99, "path": "", "major": -1, "minor": -1, "permissions": "m", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 98, "path": "", "major": -1, "minor": -1, "permissions": "m", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/console", "major": 5, "minor": 1, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/tty0", "major": 4, "minor": 0, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/tty1", "major": 4, "minor": 1, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "", "major": 136, "minor": -1, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "", "major": 5, "minor": 2, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "", "major": 10, "minor": 200, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/null", "major": 1, "minor": 3, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/zero", "major": 1, "minor": 5, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/full", "major": 1, "minor": 7, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/tty", "major": 5, "minor": 0, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/urandom", "major": 1, "minor": 9, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/random", "major": 1, "minor": 8, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 } ], "memory": 0, "memory_reservation": 0, "memory_swap": 0, "cpu_shares": 0, "cpu_quota": 0, "cpu_period": 0, "cpuset_cpus": "", "cpuset_mems": "", "blkio_weight": 0, "freezer": "", "slice": "" }, "apparmor_profile": "", "process_label": "", "rlimits": [ { "type": 7, "hard": 1024, "soft": 1024 } ], "additional_groups": null, "uid_mappings": null, "gid_mappings": null, "mask_paths": [ "/proc/kcore" ], "readonly_paths": [ "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus" ] } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/sample_configs/attach_to_bridge.json0000644000175000017500000001261512524212370027331 0ustar tianontianon{ "no_pivot_root": false, "parent_death_signal": 0, "pivot_dir": "", "rootfs": "/rootfs/jessie", "readonlyfs": false, "mounts": [ { "source": "shm", "destination": "/dev/shm", "device": "tmpfs", "flags": 14, "data": "mode=1777,size=65536k", "relabel": "" }, { "source": "mqueue", "destination": "/dev/mqueue", "device": "mqueue", "flags": 14, "data": "", "relabel": "" }, { "source": "sysfs", "destination": "/sys", "device": "sysfs", "flags": 15, "data": "", "relabel": "" } ], "devices": [ { "type": 99, "path": "/dev/fuse", "major": 10, "minor": 229, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/null", "major": 1, "minor": 3, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/zero", "major": 1, "minor": 5, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/full", "major": 1, "minor": 7, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/tty", "major": 5, "minor": 0, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/urandom", "major": 1, "minor": 9, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/random", "major": 1, "minor": 8, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 } ], "mount_label": "", "hostname": "koye", "namespaces": [ { "type": "NEWNS", "path": "" }, { "type": "NEWUTS", "path": "" }, { "type": "NEWIPC", "path": "" }, { "type": "NEWPID", "path": "" }, { "type": "NEWNET", "path": "" } ], "capabilities": [ "CHOWN", "DAC_OVERRIDE", "FSETID", "FOWNER", "MKNOD", "NET_RAW", "SETGID", "SETUID", "SETFCAP", "SETPCAP", "NET_BIND_SERVICE", "SYS_CHROOT", "KILL", "AUDIT_WRITE" ], "networks": [ { "type": "loopback", "name": "", "bridge": "", "mac_address": "", "address": "127.0.0.1/0", "gateway": "localhost", "ipv6_address": "", "ipv6_gateway": "", "mtu": 0, "txqueuelen": 0, "host_interface_name": "" }, { "type": "veth", "name": "eth0", "bridge": "docker0", "mac_address": "", "address": "172.17.0.101/16", "gateway": "172.17.42.1", "ipv6_address": "", "ipv6_gateway": "", "mtu": 1500, "txqueuelen": 0, "host_interface_name": "vethnsinit" } ], "routes": null, "cgroups": { "name": "libcontainer", "parent": "nsinit", "allow_all_devices": false, "allowed_devices": [ { "type": 99, "path": "", "major": -1, "minor": -1, "permissions": "m", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 98, "path": "", "major": -1, "minor": -1, "permissions": "m", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/console", "major": 5, "minor": 1, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/tty0", "major": 4, "minor": 0, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/tty1", "major": 4, "minor": 1, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "", "major": 136, "minor": -1, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "", "major": 5, "minor": 2, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "", "major": 10, "minor": 200, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/null", "major": 1, "minor": 3, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/zero", "major": 1, "minor": 5, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/full", "major": 1, "minor": 7, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/tty", "major": 5, "minor": 0, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/urandom", "major": 1, "minor": 9, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/random", "major": 1, "minor": 8, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 } ], "memory": 0, "memory_reservation": 0, "memory_swap": 0, "cpu_shares": 0, "cpu_quota": 0, "cpu_period": 0, "cpuset_cpus": "", "cpuset_mems": "", "blkio_weight": 0, "freezer": "", "slice": "" }, "apparmor_profile": "", "process_label": "", "rlimits": [ { "type": 7, "hard": 1024, "soft": 1024 } ], "additional_groups": null, "uid_mappings": null, "gid_mappings": null, "mask_paths": [ "/proc/kcore" ], "readonly_paths": [ "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus" ] } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/sample_configs/apparmor.json0000644000175000017500000001220312524212370025661 0ustar tianontianon{ "no_pivot_root": false, "parent_death_signal": 0, "pivot_dir": "", "rootfs": "/rootfs/jessie", "readonlyfs": false, "mounts": [ { "source": "shm", "destination": "/dev/shm", "device": "tmpfs", "flags": 14, "data": "mode=1777,size=65536k", "relabel": "" }, { "source": "mqueue", "destination": "/dev/mqueue", "device": "mqueue", "flags": 14, "data": "", "relabel": "" }, { "source": "sysfs", "destination": "/sys", "device": "sysfs", "flags": 15, "data": "", "relabel": "" } ], "devices": [ { "type": 99, "path": "/dev/fuse", "major": 10, "minor": 229, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/null", "major": 1, "minor": 3, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/zero", "major": 1, "minor": 5, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/full", "major": 1, "minor": 7, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/tty", "major": 5, "minor": 0, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/urandom", "major": 1, "minor": 9, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/random", "major": 1, "minor": 8, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 } ], "mount_label": "", "hostname": "nsinit", "namespaces": [ { "type": "NEWNS", "path": "" }, { "type": "NEWUTS", "path": "" }, { "type": "NEWIPC", "path": "" }, { "type": "NEWPID", "path": "" }, { "type": "NEWNET", "path": "" } ], "capabilities": [ "CHOWN", "DAC_OVERRIDE", "FSETID", "FOWNER", "MKNOD", "NET_RAW", "SETGID", "SETUID", "SETFCAP", "SETPCAP", "NET_BIND_SERVICE", "SYS_CHROOT", "KILL", "AUDIT_WRITE" ], "networks": [ { "type": "loopback", "name": "", "bridge": "", "mac_address": "", "address": "127.0.0.1/0", "gateway": "localhost", "ipv6_address": "", "ipv6_gateway": "", "mtu": 0, "txqueuelen": 0, "host_interface_name": "" } ], "routes": null, "cgroups": { "name": "libcontainer", "parent": "nsinit", "allow_all_devices": false, "allowed_devices": [ { "type": 99, "path": "", "major": -1, "minor": -1, "permissions": "m", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 98, "path": "", "major": -1, "minor": -1, "permissions": "m", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/console", "major": 5, "minor": 1, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/tty0", "major": 4, "minor": 0, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/tty1", "major": 4, "minor": 1, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "", "major": 136, "minor": -1, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "", "major": 5, "minor": 2, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "", "major": 10, "minor": 200, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/null", "major": 1, "minor": 3, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/zero", "major": 1, "minor": 5, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/full", "major": 1, "minor": 7, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/tty", "major": 5, "minor": 0, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/urandom", "major": 1, "minor": 9, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/random", "major": 1, "minor": 8, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 } ], "memory": 0, "memory_reservation": 0, "memory_swap": 0, "cpu_shares": 0, "cpu_quota": 0, "cpu_period": 0, "cpuset_cpus": "", "cpuset_mems": "", "blkio_weight": 0, "freezer": "", "slice": "" }, "apparmor_profile": "docker-default", "process_label": "", "rlimits": [ { "type": 7, "hard": 1024, "soft": 1024 } ], "additional_groups": null, "uid_mappings": null, "gid_mappings": null, "mask_paths": [ "/proc/kcore" ], "readonly_paths": [ "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus" ] } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/sample_configs/selinux.json0000644000175000017500000001232112524212370025530 0ustar tianontianon{ "no_pivot_root": false, "parent_death_signal": 0, "pivot_dir": "", "rootfs": "/rootfs/jessie", "readonlyfs": false, "mounts": [ { "source": "shm", "destination": "/dev/shm", "device": "tmpfs", "flags": 14, "data": "mode=1777,size=65536k", "relabel": "" }, { "source": "mqueue", "destination": "/dev/mqueue", "device": "mqueue", "flags": 14, "data": "", "relabel": "" }, { "source": "sysfs", "destination": "/sys", "device": "sysfs", "flags": 15, "data": "", "relabel": "" } ], "devices": [ { "type": 99, "path": "/dev/fuse", "major": 10, "minor": 229, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/null", "major": 1, "minor": 3, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/zero", "major": 1, "minor": 5, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/full", "major": 1, "minor": 7, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/tty", "major": 5, "minor": 0, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/urandom", "major": 1, "minor": 9, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/random", "major": 1, "minor": 8, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 } ], "mount_label": "system_u:system_r:svirt_lxc_net_t:s0:c164,c475", "hostname": "nsinit", "namespaces": [ { "type": "NEWNS", "path": "" }, { "type": "NEWUTS", "path": "" }, { "type": "NEWIPC", "path": "" }, { "type": "NEWPID", "path": "" }, { "type": "NEWNET", "path": "" } ], "capabilities": [ "CHOWN", "DAC_OVERRIDE", "FSETID", "FOWNER", "MKNOD", "NET_RAW", "SETGID", "SETUID", "SETFCAP", "SETPCAP", "NET_BIND_SERVICE", "SYS_CHROOT", "KILL", "AUDIT_WRITE" ], "networks": [ { "type": "loopback", "name": "", "bridge": "", "mac_address": "", "address": "127.0.0.1/0", "gateway": "localhost", "ipv6_address": "", "ipv6_gateway": "", "mtu": 0, "txqueuelen": 0, "host_interface_name": "" } ], "routes": null, "cgroups": { "name": "libcontainer", "parent": "nsinit", "allow_all_devices": false, "allowed_devices": [ { "type": 99, "path": "", "major": -1, "minor": -1, "permissions": "m", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 98, "path": "", "major": -1, "minor": -1, "permissions": "m", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/console", "major": 5, "minor": 1, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/tty0", "major": 4, "minor": 0, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/tty1", "major": 4, "minor": 1, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "", "major": 136, "minor": -1, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "", "major": 5, "minor": 2, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "", "major": 10, "minor": 200, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/null", "major": 1, "minor": 3, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/zero", "major": 1, "minor": 5, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/full", "major": 1, "minor": 7, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/tty", "major": 5, "minor": 0, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/urandom", "major": 1, "minor": 9, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/random", "major": 1, "minor": 8, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 } ], "memory": 0, "memory_reservation": 0, "memory_swap": 0, "cpu_shares": 0, "cpu_quota": 0, "cpu_period": 0, "cpuset_cpus": "", "cpuset_mems": "", "blkio_weight": 0, "freezer": "", "slice": "" }, "apparmor_profile": "", "process_label": "system_u:system_r:svirt_lxc_net_t:s0:c164,c475", "rlimits": [ { "type": 7, "hard": 1024, "soft": 1024 } ], "additional_groups": null, "uid_mappings": null, "gid_mappings": null, "mask_paths": [ "/proc/kcore" ], "readonly_paths": [ "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus" ] } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/sample_configs/userns.json0000644000175000017500000001306612524212370025367 0ustar tianontianon{ "no_pivot_root": false, "parent_death_signal": 0, "pivot_dir": "", "rootfs": "/rootfs/jessie", "readonlyfs": false, "mounts": [ { "source": "shm", "destination": "/dev/shm", "device": "tmpfs", "flags": 14, "data": "mode=1777,size=65536k", "relabel": "" }, { "source": "mqueue", "destination": "/dev/mqueue", "device": "mqueue", "flags": 14, "data": "", "relabel": "" }, { "source": "sysfs", "destination": "/sys", "device": "sysfs", "flags": 15, "data": "", "relabel": "" } ], "devices": [ { "type": 99, "path": "/dev/fuse", "major": 10, "minor": 229, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/null", "major": 1, "minor": 3, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/zero", "major": 1, "minor": 5, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/full", "major": 1, "minor": 7, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/tty", "major": 5, "minor": 0, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/urandom", "major": 1, "minor": 9, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/random", "major": 1, "minor": 8, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 } ], "mount_label": "", "hostname": "nsinit", "namespaces": [ { "type": "NEWNS", "path": "" }, { "type": "NEWUTS", "path": "" }, { "type": "NEWIPC", "path": "" }, { "type": "NEWPID", "path": "" }, { "type": "NEWNET", "path": "" }, { "type": "NEWUSER", "path": "" } ], "capabilities": [ "CHOWN", "DAC_OVERRIDE", "FSETID", "FOWNER", "MKNOD", "NET_RAW", "SETGID", "SETUID", "SETFCAP", "SETPCAP", "NET_BIND_SERVICE", "SYS_CHROOT", "KILL", "AUDIT_WRITE" ], "networks": [ { "type": "loopback", "name": "", "bridge": "", "mac_address": "", "address": "127.0.0.1/0", "gateway": "localhost", "ipv6_address": "", "ipv6_gateway": "", "mtu": 0, "txqueuelen": 0, "host_interface_name": "" } ], "routes": null, "cgroups": { "name": "libcontainer", "parent": "nsinit", "allow_all_devices": false, "allowed_devices": [ { "type": 99, "path": "", "major": -1, "minor": -1, "permissions": "m", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 98, "path": "", "major": -1, "minor": -1, "permissions": "m", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/console", "major": 5, "minor": 1, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/tty0", "major": 4, "minor": 0, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/tty1", "major": 4, "minor": 1, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "", "major": 136, "minor": -1, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "", "major": 5, "minor": 2, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "", "major": 10, "minor": 200, "permissions": "rwm", "file_mode": 0, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/null", "major": 1, "minor": 3, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/zero", "major": 1, "minor": 5, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/full", "major": 1, "minor": 7, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/tty", "major": 5, "minor": 0, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/urandom", "major": 1, "minor": 9, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 }, { "type": 99, "path": "/dev/random", "major": 1, "minor": 8, "permissions": "rwm", "file_mode": 438, "uid": 0, "gid": 0 } ], "memory": 0, "memory_reservation": 0, "memory_swap": 0, "cpu_shares": 0, "cpu_quota": 0, "cpu_period": 0, "cpuset_cpus": "", "cpuset_mems": "", "blkio_weight": 0, "freezer": "", "slice": "" }, "apparmor_profile": "", "process_label": "", "rlimits": [ { "type": 7, "hard": 1024, "soft": 1024 } ], "additional_groups": null, "uid_mappings": [ { "container_id": 0, "host_id": 1000, "size": 1 }, { "container_id": 1, "host_id": 1, "size": 999 }, { "container_id": 1001, "host_id": 1001, "size": 2147482647 } ], "gid_mappings": [ { "container_id": 0, "host_id": 1000, "size": 1 }, { "container_id": 1, "host_id": 1, "size": 999 }, { "container_id": 1001, "host_id": 1001, "size": 2147482647 } ], "mask_paths": [ "/proc/kcore" ], "readonly_paths": [ "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus" ] } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/utils/0000755000175000017500000000000012524212370021316 5ustar tianontianonlibcontainer-227771c8f611f03639f0eeb169428761d9504ab5/utils/utils_test.go0000644000175000017500000000045212524212370024045 0ustar tianontianonpackage utils import "testing" func TestGenerateName(t *testing.T) { name, err := GenerateRandomName("veth", 5) if err != nil { t.Fatal(err) } expected := 5 + len("veth") if len(name) != 5+len("veth") { t.Fatalf("expected name to be %d chars but received %d", expected, len(name)) } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/utils/utils.go0000644000175000017500000000330012524212370023001 0ustar tianontianonpackage utils import ( "crypto/rand" "encoding/hex" "io" "io/ioutil" "path/filepath" "strconv" "syscall" ) const ( exitSignalOffset = 128 ) // GenerateRandomName returns a new name joined with a prefix. This size // specified is used to truncate the randomly generated value func GenerateRandomName(prefix string, size int) (string, error) { id := make([]byte, 32) if _, err := io.ReadFull(rand.Reader, id); err != nil { return "", err } return prefix + hex.EncodeToString(id)[:size], nil } // ResolveRootfs ensures that the current working directory is // not a symlink and returns the absolute path to the rootfs func ResolveRootfs(uncleanRootfs string) (string, error) { rootfs, err := filepath.Abs(uncleanRootfs) if err != nil { return "", err } return filepath.EvalSymlinks(rootfs) } func CloseExecFrom(minFd int) error { fdList, err := ioutil.ReadDir("/proc/self/fd") if err != nil { return err } for _, fi := range fdList { fd, err := strconv.Atoi(fi.Name()) if err != nil { // ignore non-numeric file names continue } if fd < minFd { // ignore descriptors lower than our specified minimum continue } // intentionally ignore errors from syscall.CloseOnExec syscall.CloseOnExec(fd) // the cases where this might fail are basically file descriptors that have already been closed (including and especially the one that was created when ioutil.ReadDir did the "opendir" syscall) } return nil } // ExitStatus returns the correct exit status for a process based on if it // was signaled or existed cleanly. func ExitStatus(status syscall.WaitStatus) int { if status.Signaled() { return exitSignalOffset + int(status.Signal()) } return status.ExitStatus() } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/error.go0000644000175000017500000000217412524212370021642 0ustar tianontianonpackage libcontainer import "io" // API error code type. type ErrorCode int // API error codes. const ( // Factory errors IdInUse ErrorCode = iota InvalidIdFormat // Container errors ContainerNotExists ContainerPaused ContainerNotStopped ContainerNotRunning // Process errors ProcessNotExecuted // Common errors ConfigInvalid SystemError ) func (c ErrorCode) String() string { switch c { case IdInUse: return "Id already in use" case InvalidIdFormat: return "Invalid format" case ContainerPaused: return "Container paused" case ConfigInvalid: return "Invalid configuration" case SystemError: return "System error" case ContainerNotExists: return "Container does not exist" case ContainerNotStopped: return "Container is not stopped" case ContainerNotRunning: return "Container is not running" default: return "Unknown error" } } // API Error type. type Error interface { error // Returns a verbose string including the error message // and a representation of the stack trace suitable for // printing. Detail(w io.Writer) error // Returns the error code for this error. Code() ErrorCode } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/MAINTAINERS_GUIDE.md0000644000175000017500000001001612524212370023165 0ustar tianontianon# The libcontainer Maintainers' Guide ## Introduction Dear maintainer. Thank you for investing the time and energy to help make libcontainer as useful as possible. Maintaining a project is difficult, sometimes unrewarding work. Sure, you will get to contribute cool features to the project. But most of your time will be spent reviewing, cleaning up, documenting, answering questions, justifying design decisions - while everyone has all the fun! But remember - the quality of the maintainers work is what distinguishes the good projects from the great. So please be proud of your work, even the unglamourous parts, and encourage a culture of appreciation and respect for *every* aspect of improving the project - not just the hot new features. This document is a manual for maintainers old and new. It explains what is expected of maintainers, how they should work, and what tools are available to them. This is a living document - if you see something out of date or missing, speak up! ## What are a maintainer's responsibility? It is every maintainer's responsibility to: * 1) Expose a clear roadmap for improving their component. * 2) Deliver prompt feedback and decisions on pull requests. * 3) Be available to anyone with questions, bug reports, criticism etc. on their component. This includes IRC, GitHub requests and the mailing list. * 4) Make sure their component respects the philosophy, design and roadmap of the project. ## How are decisions made? Short answer: with pull requests to the libcontainer repository. libcontainer is an open-source project with an open design philosophy. This means that the repository is the source of truth for EVERY aspect of the project, including its philosophy, design, roadmap and APIs. *If it's part of the project, it's in the repo. It's in the repo, it's part of the project.* As a result, all decisions can be expressed as changes to the repository. An implementation change is a change to the source code. An API change is a change to the API specification. A philosophy change is a change to the philosophy manifesto. And so on. All decisions affecting libcontainer, big and small, follow the same 3 steps: * Step 1: Open a pull request. Anyone can do this. * Step 2: Discuss the pull request. Anyone can do this. * Step 3: Accept (`LGTM`) or refuse a pull request. The relevant maintainers do this (see below "Who decides what?") ## Who decides what? All decisions are pull requests, and the relevant maintainers make decisions by accepting or refusing the pull request. Review and acceptance by anyone is denoted by adding a comment in the pull request: `LGTM`. However, only currently listed `MAINTAINERS` are counted towards the required two LGTMs. libcontainer follows the timeless, highly efficient and totally unfair system known as [Benevolent dictator for life](http://en.wikipedia.org/wiki/Benevolent_Dictator_for_Life), with Michael Crosby in the role of BDFL. This means that all decisions are made by default by Michael. Since making every decision himself would be highly un-scalable, in practice decisions are spread across multiple maintainers. The relevant maintainers for a pull request can be worked out in two steps: * Step 1: Determine the subdirectories affected by the pull request. This might be `netlink/` and `security/`, or any other part of the repo. * Step 2: Find the `MAINTAINERS` file which affects this directory. If the directory itself does not have a `MAINTAINERS` file, work your way up the repo hierarchy until you find one. ### I'm a maintainer, and I'm going on holiday Please let your co-maintainers and other contributors know by raising a pull request that comments out your `MAINTAINERS` file entry using a `#`. ### I'm a maintainer, should I make pull requests too? Yes. Nobody should ever push to master directly. All changes should be made through a pull request. ### Who assigns maintainers? Michael has final `LGTM` approval for all pull requests to `MAINTAINERS` files. ### How is this process changed? Just like everything else: by making a pull request :) libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/hack/0000755000175000017500000000000012524212370021064 5ustar tianontianonlibcontainer-227771c8f611f03639f0eeb169428761d9504ab5/hack/validate.sh0000755000175000017500000000065212524212370023217 0ustar tianontianon#!/usr/bin/env bash set -e # This script runs all validations validate() { sed -i 's!docker/docker!docker/libcontainer!' /go/src/github.com/docker/docker/hack/make/.validate bash /go/src/github.com/docker/docker/hack/make/validate-dco bash /go/src/github.com/docker/docker/hack/make/validate-gofmt go get golang.org/x/tools/cmd/vet go vet github.com/docker/libcontainer/... } # run validations validate libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/console.go0000644000175000017500000000041712524212370022151 0ustar tianontianonpackage libcontainer import "io" // Console represents a pseudo TTY. type Console interface { io.ReadWriter io.Closer // Path returns the filesystem path to the slave side of the pty. Path() string // Fd returns the fd for the master of the pty. Fd() uintptr } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/Makefile0000644000175000017500000000146312524212370021622 0ustar tianontianon all: docker build -t dockercore/libcontainer . test: # we need NET_ADMIN for the netlink tests and SYS_ADMIN for mounting docker run --rm -it --privileged dockercore/libcontainer sh: docker run --rm -it --privileged -w /busybox dockercore/libcontainer nsinit exec sh GO_PACKAGES = $(shell find . -not \( -wholename ./vendor -prune -o -wholename ./.git -prune \) -name '*.go' -print0 | xargs -0n1 dirname | sort -u) direct-test: go test $(TEST_TAGS) -cover -v $(GO_PACKAGES) direct-test-short: go test $(TEST_TAGS) -cover -test.short -v $(GO_PACKAGES) direct-build: go build -v $(GO_PACKAGES) direct-install: go install -v $(GO_PACKAGES) local: go test -v validate: hack/validate.sh binary: all docker run --rm --privileged -v $(CURDIR)/bundles:/go/bin dockercore/libcontainer make direct-install libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/container_userns_linux.go0000644000175000017500000000143412524212370025307 0ustar tianontianon// +build go1.4 package libcontainer import "syscall" // Converts IDMap to SysProcIDMap array and adds it to SysProcAttr. func (c *linuxContainer) addUidGidMappings(sys *syscall.SysProcAttr) error { if c.config.UidMappings != nil { sys.UidMappings = make([]syscall.SysProcIDMap, len(c.config.UidMappings)) for i, um := range c.config.UidMappings { sys.UidMappings[i].ContainerID = um.ContainerID sys.UidMappings[i].HostID = um.HostID sys.UidMappings[i].Size = um.Size } } if c.config.GidMappings != nil { sys.GidMappings = make([]syscall.SysProcIDMap, len(c.config.GidMappings)) for i, gm := range c.config.GidMappings { sys.GidMappings[i].ContainerID = gm.ContainerID sys.GidMappings[i].HostID = gm.HostID sys.GidMappings[i].Size = gm.Size } } return nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/console_linux.go0000644000175000017500000000716212524212370023374 0ustar tianontianon// +build linux package libcontainer import ( "fmt" "os" "path/filepath" "syscall" "unsafe" "github.com/docker/libcontainer/label" ) // newConsole returns an initalized console that can be used within a container by copying bytes // from the master side to the slave that is attached as the tty for the container's init process. func newConsole(uid, gid int) (Console, error) { master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) if err != nil { return nil, err } console, err := ptsname(master) if err != nil { return nil, err } if err := unlockpt(master); err != nil { return nil, err } if err := os.Chmod(console, 0600); err != nil { return nil, err } if err := os.Chown(console, uid, gid); err != nil { return nil, err } return &linuxConsole{ slavePath: console, master: master, }, nil } // newConsoleFromPath is an internal fucntion returning an initialzied console for use inside // a container's MNT namespace. func newConsoleFromPath(slavePath string) *linuxConsole { return &linuxConsole{ slavePath: slavePath, } } // linuxConsole is a linux psuedo TTY for use within a container. type linuxConsole struct { master *os.File slavePath string } func (c *linuxConsole) Fd() uintptr { return c.master.Fd() } func (c *linuxConsole) Path() string { return c.slavePath } func (c *linuxConsole) Read(b []byte) (int, error) { return c.master.Read(b) } func (c *linuxConsole) Write(b []byte) (int, error) { return c.master.Write(b) } func (c *linuxConsole) Close() error { if m := c.master; m != nil { return m.Close() } return nil } // mount initializes the console inside the rootfs mounting with the specified mount label // and applying the correct ownership of the console. func (c *linuxConsole) mount(rootfs, mountLabel string, uid, gid int) error { oldMask := syscall.Umask(0000) defer syscall.Umask(oldMask) if err := label.SetFileLabel(c.slavePath, mountLabel); err != nil { return err } dest := filepath.Join(rootfs, "/dev/console") f, err := os.Create(dest) if err != nil && !os.IsExist(err) { return err } if f != nil { f.Close() } return syscall.Mount(c.slavePath, dest, "bind", syscall.MS_BIND, "") } // dupStdio opens the slavePath for the console and dup2s the fds to the current // processes stdio, fd 0,1,2. func (c *linuxConsole) dupStdio() error { slave, err := c.open(syscall.O_RDWR) if err != nil { return err } fd := int(slave.Fd()) for _, i := range []int{0, 1, 2} { if err := syscall.Dup2(fd, i); err != nil { return err } } return nil } // open is a clone of os.OpenFile without the O_CLOEXEC used to open the pty slave. func (c *linuxConsole) open(flag int) (*os.File, error) { r, e := syscall.Open(c.slavePath, flag, 0) if e != nil { return nil, &os.PathError{ Op: "open", Path: c.slavePath, Err: e, } } return os.NewFile(uintptr(r), c.slavePath), nil } func ioctl(fd uintptr, flag, data uintptr) error { if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, flag, data); err != 0 { return err } return nil } // unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. // unlockpt should be called before opening the slave side of a pty. func unlockpt(f *os.File) error { var u int32 return ioctl(f.Fd(), syscall.TIOCSPTLCK, uintptr(unsafe.Pointer(&u))) } // ptsname retrieves the name of the first available pts for the given master. func ptsname(f *os.File) (string, error) { var n int32 if err := ioctl(f.Fd(), syscall.TIOCGPTN, uintptr(unsafe.Pointer(&n))); err != nil { return "", err } return fmt.Sprintf("/dev/pts/%d", n), nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/xattr/0000755000175000017500000000000012524212370021320 5ustar tianontianonlibcontainer-227771c8f611f03639f0eeb169428761d9504ab5/xattr/xattr_test.go0000644000175000017500000000267012524212370024055 0ustar tianontianon// +build linux package xattr_test import ( "os" "testing" "github.com/docker/libcontainer/xattr" ) func testXattr(t *testing.T) { tmp := "xattr_test" out, err := os.OpenFile(tmp, os.O_WRONLY, 0) if err != nil { t.Fatal("failed") } attr := "user.test" out.Close() if !xattr.XattrEnabled(tmp) { t.Log("Disabled") t.Fatal("failed") } t.Log("Success") err = xattr.Setxattr(tmp, attr, "test") if err != nil { t.Fatal("failed") } var value string value, err = xattr.Getxattr(tmp, attr) if err != nil { t.Fatal("failed") } if value != "test" { t.Fatal("failed") } t.Log("Success") var names []string names, err = xattr.Listxattr(tmp) if err != nil { t.Fatal("failed") } var found int for _, name := range names { if name == attr { found = 1 } } // Listxattr doesn't return trusted.* and system.* namespace // attrs when run in unprevileged mode. if found != 1 { t.Fatal("failed") } t.Log("Success") big := "0000000000000000000000000000000000000000000000000000000000000000000008c6419ad822dfe29283fb3ac98dcc5908810cb31f4cfe690040c42c144b7492eicompslf20dxmlpgz" // Test for long xattrs larger than 128 bytes err = xattr.Setxattr(tmp, attr, big) if err != nil { t.Fatal("failed to add long value") } value, err = xattr.Getxattr(tmp, attr) if err != nil { t.Fatal("failed to get long value") } t.Log("Success") if value != big { t.Fatal("failed, value doesn't match") } t.Log("Success") } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/xattr/xattr_unsupported.go0000644000175000017500000000044612524212370025465 0ustar tianontianon// +build !linux package xattr func Listxattr(path string) ([]string, error) { return nil, ErrNotSupportedPlatform } func Getxattr(path, attr string) (string, error) { return "", ErrNotSupportedPlatform } func Setxattr(path, xattr, value string) error { return ErrNotSupportedPlatform } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/xattr/xattr_linux.go0000644000175000017500000000174312524212370024235 0ustar tianontianon// +build linux package xattr import ( "syscall" "github.com/docker/libcontainer/system" ) func XattrEnabled(path string) bool { if Setxattr(path, "user.test", "") == syscall.ENOTSUP { return false } return true } func stringsfromByte(buf []byte) (result []string) { offset := 0 for index, b := range buf { if b == 0 { result = append(result, string(buf[offset:index])) offset = index + 1 } } return } func Listxattr(path string) ([]string, error) { size, err := system.Llistxattr(path, nil) if err != nil { return nil, err } buf := make([]byte, size) read, err := system.Llistxattr(path, buf) if err != nil { return nil, err } names := stringsfromByte(buf[:read]) return names, nil } func Getxattr(path, attr string) (string, error) { value, err := system.Lgetxattr(path, attr) if err != nil { return "", err } return string(value), nil } func Setxattr(path, xattr, value string) error { return system.Lsetxattr(path, xattr, []byte(value), 0) } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/xattr/errors.go0000644000175000017500000000025012524212370023160 0ustar tianontianonpackage xattr import ( "fmt" "runtime" ) var ErrNotSupportedPlatform = fmt.Errorf("platform and architecture is not supported %s %s", runtime.GOOS, runtime.GOARCH) libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/label/0000755000175000017500000000000012524212370021235 5ustar tianontianonlibcontainer-227771c8f611f03639f0eeb169428761d9504ab5/label/label.go0000644000175000017500000000237112524212370022646 0ustar tianontianon// +build !selinux !linux package label // InitLabels returns the process label and file labels to be used within // the container. A list of options can be passed into this function to alter // the labels. func InitLabels(options []string) (string, string, error) { return "", "", nil } func GenLabels(options string) (string, string, error) { return "", "", nil } func FormatMountLabel(src string, mountLabel string) string { return src } func SetProcessLabel(processLabel string) error { return nil } func SetFileLabel(path string, fileLabel string) error { return nil } func SetFileCreateLabel(fileLabel string) error { return nil } func Relabel(path string, fileLabel string, relabel string) error { return nil } func GetPidLabel(pid int) (string, error) { return "", nil } func Init() { } func ReserveLabel(label string) error { return nil } func UnreserveLabel(label string) error { return nil } // DupSecOpt takes an process label and returns security options that // can be used to set duplicate labels on future container processes func DupSecOpt(src string) []string { return nil } // DisableSecOpt returns a security opt that can disable labeling // support for future container processes func DisableSecOpt() []string { return nil } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/label/label_selinux.go0000644000175000017500000001114012524212370024407 0ustar tianontianon// +build selinux,linux package label import ( "fmt" "strings" "github.com/docker/libcontainer/selinux" ) // InitLabels returns the process label and file labels to be used within // the container. A list of options can be passed into this function to alter // the labels. The labels returned will include a random MCS String, that is // guaranteed to be unique. func InitLabels(options []string) (string, string, error) { if !selinux.SelinuxEnabled() { return "", "", nil } processLabel, mountLabel := selinux.GetLxcContexts() if processLabel != "" { pcon := selinux.NewContext(processLabel) mcon := selinux.NewContext(mountLabel) for _, opt := range options { if opt == "disable" { return "", "", nil } if i := strings.Index(opt, ":"); i == -1 { return "", "", fmt.Errorf("Bad SELinux Option") } con := strings.SplitN(opt, ":", 2) pcon[con[0]] = con[1] if con[0] == "level" || con[0] == "user" { mcon[con[0]] = con[1] } } processLabel = pcon.Get() mountLabel = mcon.Get() } return processLabel, mountLabel, nil } // DEPRECATED: The GenLabels function is only to be used during the transition to the official API. func GenLabels(options string) (string, string, error) { return InitLabels(strings.Fields(options)) } // FormatMountLabel returns a string to be used by the mount command. // The format of this string will be used to alter the labeling of the mountpoint. // The string returned is suitable to be used as the options field of the mount command. // If you need to have additional mount point options, you can pass them in as // the first parameter. Second parameter is the label that you wish to apply // to all content in the mount point. func FormatMountLabel(src, mountLabel string) string { if mountLabel != "" { switch src { case "": src = fmt.Sprintf("context=%q", mountLabel) default: src = fmt.Sprintf("%s,context=%q", src, mountLabel) } } return src } // SetProcessLabel takes a process label and tells the kernel to assign the // label to the next program executed by the current process. func SetProcessLabel(processLabel string) error { if processLabel == "" { return nil } return selinux.Setexeccon(processLabel) } // GetProcessLabel returns the process label that the kernel will assign // to the next program executed by the current process. If "" is returned // this indicates that the default labeling will happen for the process. func GetProcessLabel() (string, error) { return selinux.Getexeccon() } // SetFileLabel modifies the "path" label to the specified file label func SetFileLabel(path string, fileLabel string) error { if selinux.SelinuxEnabled() && fileLabel != "" { return selinux.Setfilecon(path, fileLabel) } return nil } // Tell the kernel the label for all files to be created func SetFileCreateLabel(fileLabel string) error { if selinux.SelinuxEnabled() { return selinux.Setfscreatecon(fileLabel) } return nil } // Change the label of path to the filelabel string. If the relabel string // is "z", relabel will change the MCS label to s0. This will allow all // containers to share the content. If the relabel string is a "Z" then // the MCS label should continue to be used. SELinux will use this field // to make sure the content can not be shared by other containes. func Relabel(path string, fileLabel string, relabel string) error { if fileLabel == "" { return nil } if relabel == "z" { c := selinux.NewContext(fileLabel) c["level"] = "s0" fileLabel = c.Get() } return selinux.Chcon(path, fileLabel, true) } // GetPidLabel will return the label of the process running with the specified pid func GetPidLabel(pid int) (string, error) { return selinux.Getpidcon(pid) } // Init initialises the labeling system func Init() { selinux.SelinuxEnabled() } // ReserveLabel will record the fact that the MCS label has already been used. // This will prevent InitLabels from using the MCS label in a newly created // container func ReserveLabel(label string) error { selinux.ReserveLabel(label) return nil } // UnreserveLabel will remove the reservation of the MCS label. // This will allow InitLabels to use the MCS label in a newly created // containers func UnreserveLabel(label string) error { selinux.FreeLxcContexts(label) return nil } // DupSecOpt takes an process label and returns security options that // can be used to set duplicate labels on future container processes func DupSecOpt(src string) []string { return selinux.DupSecOpt(src) } // DisableSecOpt returns a security opt that can disable labeling // support for future container processes func DisableSecOpt() []string { return selinux.DisableSecOpt() } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/label/label_selinux_test.go0000644000175000017500000000412612524212370025454 0ustar tianontianon// +build selinux,linux package label import ( "strings" "testing" "github.com/docker/libcontainer/selinux" ) func TestInit(t *testing.T) { if selinux.SelinuxEnabled() { var testNull []string plabel, mlabel, err := InitLabels(testNull) if err != nil { t.Log("InitLabels Failed") t.Fatal(err) } testDisabled := []string{"disable"} plabel, mlabel, err = InitLabels(testDisabled) if err != nil { t.Log("InitLabels Disabled Failed") t.Fatal(err) } if plabel != "" { t.Log("InitLabels Disabled Failed") t.Fatal() } testUser := []string{"user:user_u", "role:user_r", "type:user_t", "level:s0:c1,c15"} plabel, mlabel, err = InitLabels(testUser) if err != nil { t.Log("InitLabels User Failed") t.Fatal(err) } if plabel != "user_u:user_r:user_t:s0:c1,c15" || mlabel != "user_u:object_r:svirt_sandbox_file_t:s0:c1,c15" { t.Log("InitLabels User Match Failed") t.Log(plabel, mlabel) t.Fatal(err) } testBadData := []string{"user", "role:user_r", "type:user_t", "level:s0:c1,c15"} plabel, mlabel, err = InitLabels(testBadData) if err == nil { t.Log("InitLabels Bad Failed") t.Fatal(err) } } } func TestDuplicateLabel(t *testing.T) { secopt := DupSecOpt("system_u:system_r:svirt_lxc_net_t:s0:c1,c2") t.Log(secopt) for _, opt := range secopt { con := strings.SplitN(opt, ":", 3) if len(con) != 3 || con[0] != "label" { t.Errorf("Invalid DupSecOpt return value") continue } if con[1] == "user" { if con[2] != "system_u" { t.Errorf("DupSecOpt Failed user incorrect") } continue } if con[1] == "role" { if con[2] != "system_r" { t.Errorf("DupSecOpt Failed role incorrect") } continue } if con[1] == "type" { if con[2] != "svirt_lxc_net_t" { t.Errorf("DupSecOpt Failed type incorrect") } continue } if con[1] == "level" { if con[2] != "s0:c1,c2" { t.Errorf("DupSecOpt Failed level incorrect") } continue } t.Errorf("DupSecOpt Failed invalid field %q", con[1]) } secopt = DisableSecOpt() if secopt[0] != "label:disable" { t.Errorf("DisableSecOpt Failed level incorrect") } } libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/ROADMAP.md0000644000175000017500000000236412524212370021570 0ustar tianontianon# libcontainer: what's next? This document is a high-level overview of where we want to take libcontainer next. It is a curated selection of planned improvements which are either important, difficult, or both. For a more complete view of planned and requested improvements, see [the Github issues](https://github.com/docker/libcontainer/issues). To suggest changes to the roadmap, including additions, please write the change as if it were already in effect, and make a pull request. ## Broader kernel support Our goal is to make libcontainer run everywhere, but currently libcontainer requires Linux version 3.8 or higher. If you’re deploying new machines for the purpose of running libcontainer, this is a fairly easy requirement to meet. However, if you’re adding libcontainer to an existing deployment, you may not have the flexibility to update and patch the kernel. ## Cross-architecture support Our goal is to make libcontainer run everywhere. Recently libcontainer has expanded from its initial support for x86_64 systems to include POWER (ppc64 little and big endian variants), IBM System z (s390x 64-bit), and ARM. We plan to continue expanding architecture support such that libcontainer containers can be created and used on more architectures. libcontainer-227771c8f611f03639f0eeb169428761d9504ab5/error_test.go0000644000175000017500000000103012524212370022667 0ustar tianontianonpackage libcontainer import "testing" func TestErrorCode(t *testing.T) { codes := map[ErrorCode]string{ IdInUse: "Id already in use", InvalidIdFormat: "Invalid format", ContainerPaused: "Container paused", ConfigInvalid: "Invalid configuration", SystemError: "System error", ContainerNotExists: "Container does not exist", } for code, expected := range codes { if actual := code.String(); actual != expected { t.Fatalf("expected string %q but received %q", expected, actual) } } }