diff --git a/api/v1/server.go b/api/v1/server.go index dc1647f..aef85d0 100644 --- a/api/v1/server.go +++ b/api/v1/server.go @@ -25,8 +25,8 @@ type server struct { supervisor *containerd.Supervisor } -func (s *server) HandleHTTP(w http.ResponseWriter, r *http.Request) { - s.r.HandleHTTP(w, r) +func (s *server) ServeHTTP(w http.ResponseWriter, r *http.Request) { + s.r.ServeHTTP(w, r) } func (s *server) containers(w http.ResponseWriter, r *http.Request) { diff --git a/containerd/daemon.go b/containerd/daemon.go index d212f8c..3d6b66c 100644 --- a/containerd/daemon.go +++ b/containerd/daemon.go @@ -26,11 +26,14 @@ var DaemonCommand = cli.Command{ }, }, Action: func(context *cli.Context) { + if err := daemon(context.String("state-dir"), 20, context.Int("buffer-size")); err != nil { + logrus.Fatal(err) + } }, } -func daemon(stateDir string, bufferSize int) error { - supervisor, err := container.NewSupervisor(stateDir) +func daemon(stateDir string, concurrency, bufferSize int) error { + supervisor, err := containerd.NewSupervisor(stateDir, concurrency) if err != nil { return err } @@ -55,9 +58,7 @@ func startSignalHandler(supervisor *containerd.Supervisor, bufferSize int) { logrus.WithField("error", err).Error("containerd: reaping child processes") } for _, e := range exits { - if err := supervisor.Process(e); err != nil { - logrus.WithField("error", err).Error("containerd: processing events") - } + supervisor.SendEvent(e) } } } @@ -79,7 +80,7 @@ func reap() (exits []*containerd.ExitEvent, err error) { if pid <= 0 { return exits, nil } - exits = append(exits, *conatinerd.ExitEvent{ + exits = append(exits, &containerd.ExitEvent{ Pid: pid, Status: utils.ExitStatus(ws), }) diff --git a/containerd/main.go b/containerd/main.go index c7ae003..bace1c2 100644 --- a/containerd/main.go +++ b/containerd/main.go @@ -18,12 +18,23 @@ func main() { app.Version = Version app.Usage = Usage app.Authors = []cli.Author{ - Name: "@crosbymichael", - Email: "crosbymichael@gmail.com", + { + Name: "@crosbymichael", + Email: "crosbymichael@gmail.com", + }, } app.Commands = []cli.Command{ DaemonCommand, } + app.Flags = []cli.Flag{ + cli.BoolFlag{Name: "debug", Usage: "enable debug output in the logs"}, + } + app.Before = func(context *cli.Context) error { + if context.GlobalBool("debug") { + logrus.SetLevel(logrus.DebugLevel) + } + return nil + } if err := app.Run(os.Args); err != nil { logrus.Fatal(err) } diff --git a/jobs.go b/jobs.go index 51e0b94..af445ca 100644 --- a/jobs.go +++ b/jobs.go @@ -4,5 +4,7 @@ type Job interface { } type CreateJob struct { - Err chan error + ID string + BundlePath string + Err chan error } diff --git a/runtime.go b/runtime.go index 1dafe8b..4ffc838 100644 --- a/runtime.go +++ b/runtime.go @@ -3,5 +3,4 @@ package containerd // runtime handles containers, containers handle their own actions. type Runtime interface { Create(id, bundlePath string) (Container, error) - Delete(id sting) error } diff --git a/runtime_linux.go b/runtime_linux.go index 80e415a..61e1ee7 100644 --- a/runtime_linux.go +++ b/runtime_linux.go @@ -13,19 +13,146 @@ import ( "syscall" "github.com/opencontainers/runc/libcontainer" - "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/seccomp" "github.com/opencontainers/specs" ) +const ( + RLIMIT_CPU = iota // CPU time in sec + RLIMIT_FSIZE // Maximum filesize + RLIMIT_DATA // max data size + RLIMIT_STACK // max stack size + RLIMIT_CORE // max core file size + RLIMIT_RSS // max resident set size + RLIMIT_NPROC // max number of processes + RLIMIT_NOFILE // max number of open files + RLIMIT_MEMLOCK // max locked-in-memory address space + RLIMIT_AS // address space limit + RLIMIT_LOCKS // maximum file locks held + RLIMIT_SIGPENDING // max number of pending signals + RLIMIT_MSGQUEUE // maximum bytes in POSIX mqueues + RLIMIT_NICE // max nice prio allowed to raise to + RLIMIT_RTPRIO // maximum realtime priority + RLIMIT_RTTIME // timeout for RT tasks in us +) + +var rlimitMap = map[string]int{ + "RLIMIT_CPU": RLIMIT_CPU, + "RLIMIT_FSIZE": RLIMIT_FSIZE, + "RLIMIT_DATA": RLIMIT_DATA, + "RLIMIT_STACK": RLIMIT_STACK, + "RLIMIT_CORE": RLIMIT_CORE, + "RLIMIT_RSS": RLIMIT_RSS, + "RLIMIT_NPROC": RLIMIT_NPROC, + "RLIMIT_NOFILE": RLIMIT_NOFILE, + "RLIMIT_MEMLOCK": RLIMIT_MEMLOCK, + "RLIMIT_AS": RLIMIT_AS, + "RLIMIT_LOCKS": RLIMIT_LOCKS, + "RLIMIT_SGPENDING": RLIMIT_SIGPENDING, + "RLIMIT_MSGQUEUE": RLIMIT_MSGQUEUE, + "RLIMIT_NICE": RLIMIT_NICE, + "RLIMIT_RTPRIO": RLIMIT_RTPRIO, + "RLIMIT_RTTIME": RLIMIT_RTTIME, +} + +func strToRlimit(key string) (int, error) { + rl, ok := rlimitMap[key] + if !ok { + return 0, fmt.Errorf("Wrong rlimit value: %s", key) + } + return rl, nil +} + +const wildcard = -1 + +var allowedDevices = []*configs.Device{ + // allow mknod for any device + { + Type: 'c', + Major: wildcard, + Minor: wildcard, + Permissions: "m", + }, + { + Type: 'b', + Major: wildcard, + Minor: wildcard, + Permissions: "m", + }, + { + Path: "/dev/console", + Type: 'c', + Major: 5, + Minor: 1, + Permissions: "rwm", + }, + { + Path: "/dev/tty0", + Type: 'c', + Major: 4, + Minor: 0, + Permissions: "rwm", + }, + { + Path: "/dev/tty1", + Type: 'c', + Major: 4, + Minor: 1, + Permissions: "rwm", + }, + // /dev/pts/ - pts namespaces are "coming soon" + { + Path: "", + Type: 'c', + Major: 136, + Minor: wildcard, + Permissions: "rwm", + }, + { + Path: "", + Type: 'c', + Major: 5, + Minor: 2, + Permissions: "rwm", + }, + // tuntap + { + Path: "", + Type: 'c', + Major: 10, + Minor: 200, + Permissions: "rwm", + }, +} + +var namespaceMapping = map[specs.NamespaceType]configs.NamespaceType{ + specs.PIDNamespace: configs.NEWPID, + specs.NetworkNamespace: configs.NEWNET, + specs.MountNamespace: configs.NEWNS, + specs.UserNamespace: configs.NEWUSER, + specs.IPCNamespace: configs.NEWIPC, + specs.UTSNamespace: configs.NEWUTS, +} + +var mountPropagationMapping = map[string]int{ + "rprivate": syscall.MS_PRIVATE | syscall.MS_REC, + "private": syscall.MS_PRIVATE, + "rslave": syscall.MS_SLAVE | syscall.MS_REC, + "slave": syscall.MS_SLAVE, + "rshared": syscall.MS_SHARED | syscall.MS_REC, + "shared": syscall.MS_SHARED, + "": syscall.MS_PRIVATE | syscall.MS_REC, +} + func init() { if len(os.Args) > 1 && os.Args[1] == "init" { runtime.GOMAXPROCS(1) runtime.LockOSThread() factory, _ := libcontainer.New("") if err := factory.StartInitialization(); err != nil { - fatal(err) + fmt.Fprint(os.Stderr, err) + os.Exit(1) } panic("--this line should have never been executed, congratulations--") } @@ -48,16 +175,17 @@ func (c *libcontainerContainer) Delete() error { return c.c.Destroy() } -func NewLibcontainerRuntime(stateDir string) (Runtime, error) { - f, err := libcontainer.New(abs, libcontainer.Cgroupfs, func(l *libcontainer.LinuxFactory) error { +func NewRuntime(stateDir string) (Runtime, error) { + f, err := libcontainer.New(stateDir, libcontainer.Cgroupfs, func(l *libcontainer.LinuxFactory) error { //l.CriuPath = context.GlobalString("criu") return nil }) - r := &libcontainerRuntime{ - factory: f, - containers: make(map[string]libcontainer.Container), + if err != nil { + return nil, err } - return r, nil + return &libcontainerRuntime{ + factory: f, + }, nil } @@ -66,14 +194,14 @@ type libcontainerRuntime struct { } func (r *libcontainerRuntime) Create(id, bundlePath string) (Container, error) { - spec, rspec, err := loadSpec( + spec, rspec, err := r.loadSpec( filepath.Join(bundlePath, "config.json"), filepath.Join(bundlePath, "runtime.json"), ) if err != nil { return nil, err } - config, err := r.createLibcontainerConfig(id, spec, rspec) + config, err := r.createLibcontainerConfig(id, bundlePath, spec, rspec) if err != nil { return nil, err } @@ -155,8 +283,6 @@ func (r *libcontainerRuntime) createLibcontainerConfig(cgroupName, bundlePath st Readonlyfs: spec.Root.Readonly, Hostname: spec.Hostname, } - - exists := false for _, ns := range rspec.Linux.Namespaces { t, exists := namespaceMapping[ns.Type] if !exists { @@ -239,14 +365,10 @@ func (r *libcontainerRuntime) createLibcontainerMount(cwd, dest string, m specs. } } -func (r *libcontainerRuntime) createCgroupConfig(name string, spec *specs.LinuxRuntimeSpec, devices []*configs.Device) (*configs.Cgroup, error) { - myCgroupPath, err := cgroups.GetThisCgroupDir("devices") - if err != nil { - return nil, err - } +func (rt *libcontainerRuntime) createCgroupConfig(name string, spec *specs.LinuxRuntimeSpec, devices []*configs.Device) (*configs.Cgroup, error) { c := &configs.Cgroup{ Name: name, - Parent: myCgroupPath, + Parent: "/containerd", AllowedDevices: append(devices, allowedDevices...), } r := spec.Linux.Resources diff --git a/supervisor.go b/supervisor.go index 6002d27..5c40dc9 100644 --- a/supervisor.go +++ b/supervisor.go @@ -8,18 +8,20 @@ import ( ) // NewSupervisor returns an initialized Process supervisor. -func NewSupervisor(stateDir string, concurrency int, runtime Runtime) (*Supervisor, error) { +func NewSupervisor(stateDir string, concurrency int) (*Supervisor, error) { if err := os.MkdirAll(stateDir, 0755); err != nil { return nil, err } - s := &Supervisor{ - stateDir: stateDir, - processes: make(map[int]Process), - runtime: runtime, - jobs: make(chan Job, 1024), + runtime, err := NewRuntime(stateDir) + if err != nil { + return nil, err } - s.state = &runningState{ - s: s, + s := &Supervisor{ + stateDir: stateDir, + processes: make(map[int]Container), + containers: make(map[string]Container), + runtime: runtime, + jobs: make(chan Job, 1024), } for i := 0; i < concurrency; i++ { s.workerGroup.Add(1) @@ -94,7 +96,7 @@ func (s *Supervisor) worker(id int) { for job := range s.jobs { switch j := job.(type) { case *CreateJob: - container, err := r.s.runtime.Create(j.ID, j.BundlePath) + container, err := s.runtime.Create(j.ID, j.BundlePath) if err != nil { j.Err <- err }