Vendor in container storage

This should add quota support to cri-o

Signed-off-by: Daniel J Walsh <dwalsh@redhat.com>
This commit is contained in:
Daniel J Walsh 2017-09-26 19:58:51 +00:00
parent e838611fdd
commit 29bd1c79dd
52 changed files with 2751 additions and 1881 deletions

View file

@ -33,22 +33,22 @@ import (
"path/filepath"
"strings"
"sync"
"syscall"
"github.com/sirupsen/logrus"
"github.com/vbatts/tar-split/tar/storage"
"time"
"github.com/containers/storage/drivers"
"github.com/containers/storage/pkg/archive"
"github.com/containers/storage/pkg/chrootarchive"
"github.com/containers/storage/pkg/directory"
"github.com/containers/storage/pkg/idtools"
"github.com/containers/storage/pkg/locker"
mountpk "github.com/containers/storage/pkg/mount"
"github.com/containers/storage/pkg/stringid"
"github.com/containers/storage/pkg/system"
rsystem "github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/vbatts/tar-split/tar/storage"
"golang.org/x/sys/unix"
)
var (
@ -75,6 +75,8 @@ type Driver struct {
ctr *graphdriver.RefCounter
pathCacheLock sync.Mutex
pathCache map[string]string
naiveDiff graphdriver.DiffDriver
locker *locker.Locker
}
// Init returns a new AUFS driver.
@ -84,6 +86,7 @@ func Init(root string, options []string, uidMaps, gidMaps []idtools.IDMap) (grap
// Try to load the aufs kernel module
if err := supportsAufs(); err != nil {
return nil, errors.Wrap(graphdriver.ErrNotSupported, "kernel does not support aufs")
}
fsMagic, err := graphdriver.GetFSMagic(root)
@ -112,6 +115,7 @@ func Init(root string, options []string, uidMaps, gidMaps []idtools.IDMap) (grap
gidMaps: gidMaps,
pathCache: make(map[string]string),
ctr: graphdriver.NewRefCounter(graphdriver.NewFsChecker(graphdriver.FsMagicAufs)),
locker: locker.New(),
}
rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
@ -138,6 +142,32 @@ func Init(root string, options []string, uidMaps, gidMaps []idtools.IDMap) (grap
return nil, err
}
}
logger := logrus.WithFields(logrus.Fields{
"module": "graphdriver",
"driver": "aufs",
})
for _, path := range []string{"mnt", "diff"} {
p := filepath.Join(root, path)
entries, err := ioutil.ReadDir(p)
if err != nil {
logger.WithError(err).WithField("dir", p).Error("error reading dir entries")
continue
}
for _, entry := range entries {
if !entry.IsDir() {
continue
}
if strings.HasSuffix(entry.Name(), "-removing") {
logger.WithField("dir", entry.Name()).Debug("Cleaning up stale layer dir")
if err := system.EnsureRemoveAll(filepath.Join(p, entry.Name())); err != nil {
logger.WithField("dir", entry.Name()).WithError(err).Error("Error removing stale layer dir")
}
}
}
}
a.naiveDiff = graphdriver.NewNaiveDiffDriver(a, uidMaps, gidMaps)
return a, nil
}
@ -201,17 +231,22 @@ func (a *Driver) Exists(id string) bool {
return true
}
// AdditionalImageStores returns additional image stores supported by the driver
func (a *Driver) AdditionalImageStores() []string {
return nil
}
// CreateReadWrite creates a layer that is writable for use as a container
// file system.
func (a *Driver) CreateReadWrite(id, parent, mountLabel string, storageOpt map[string]string) error {
return a.Create(id, parent, mountLabel, storageOpt)
func (a *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts) error {
return a.Create(id, parent, opts)
}
// Create three folders for each id
// mnt, layers, and diff
func (a *Driver) Create(id, parent, mountLabel string, storageOpt map[string]string) error {
func (a *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) error {
if len(storageOpt) != 0 {
if opts != nil && len(opts.StorageOpt) != 0 {
return fmt.Errorf("--storage-opt is not supported for aufs")
}
@ -226,7 +261,7 @@ func (a *Driver) Create(id, parent, mountLabel string, storageOpt map[string]str
defer f.Close()
if parent != "" {
ids, err := getParentIds(a.rootPath(), parent)
ids, err := getParentIDs(a.rootPath(), parent)
if err != nil {
return err
}
@ -269,35 +304,68 @@ func (a *Driver) createDirsFor(id string) error {
// Remove will unmount and remove the given id.
func (a *Driver) Remove(id string) error {
a.locker.Lock(id)
defer a.locker.Unlock(id)
a.pathCacheLock.Lock()
mountpoint, exists := a.pathCache[id]
a.pathCacheLock.Unlock()
if !exists {
mountpoint = a.getMountpoint(id)
}
if err := a.unmount(mountpoint); err != nil {
// no need to return here, we can still try to remove since the `Rename` will fail below if still mounted
logrus.Debugf("aufs: error while unmounting %s: %v", mountpoint, err)
}
// Atomically remove each directory in turn by first moving it out of the
// way (so that container runtimes don't find it anymore) before doing removal of
// the whole tree.
tmpMntPath := path.Join(a.mntPath(), fmt.Sprintf("%s-removing", id))
if err := os.Rename(mountpoint, tmpMntPath); err != nil && !os.IsNotExist(err) {
return err
}
defer os.RemoveAll(tmpMntPath)
logger := logrus.WithFields(logrus.Fields{
"module": "graphdriver",
"driver": "aufs",
"layer": id,
})
tmpDiffpath := path.Join(a.diffPath(), fmt.Sprintf("%s-removing", id))
if err := os.Rename(a.getDiffPath(id), tmpDiffpath); err != nil && !os.IsNotExist(err) {
return err
var retries int
for {
mounted, err := a.mounted(mountpoint)
if err != nil {
if os.IsNotExist(err) {
break
}
return err
}
if !mounted {
break
}
err = a.unmount(mountpoint)
if err == nil {
break
}
if err != unix.EBUSY {
return errors.Wrapf(err, "aufs: unmount error: %s", mountpoint)
}
if retries >= 5 {
return errors.Wrapf(err, "aufs: unmount error after retries: %s", mountpoint)
}
// If unmount returns EBUSY, it could be a transient error. Sleep and retry.
retries++
logger.Warnf("unmount failed due to EBUSY: retry count: %d", retries)
time.Sleep(100 * time.Millisecond)
}
defer os.RemoveAll(tmpDiffpath)
// Remove the layers file for the id
if err := os.Remove(path.Join(a.rootPath(), "layers", id)); err != nil && !os.IsNotExist(err) {
return err
return errors.Wrapf(err, "error removing layers dir for %s", id)
}
if err := atomicRemove(a.getDiffPath(id)); err != nil {
return errors.Wrapf(err, "could not remove diff path for id %s", id)
}
// Atomically remove each directory in turn by first moving it out of the
// way (so that container runtime doesn't find it anymore) before doing removal of
// the whole tree.
if err := atomicRemove(mountpoint); err != nil {
if errors.Cause(err) == unix.EBUSY {
logger.WithField("dir", mountpoint).WithError(err).Warn("error performing atomic remove due to EBUSY")
}
return errors.Wrapf(err, "could not remove mountpoint for id %s", id)
}
a.pathCacheLock.Lock()
@ -306,9 +374,29 @@ func (a *Driver) Remove(id string) error {
return nil
}
func atomicRemove(source string) error {
target := source + "-removing"
err := os.Rename(source, target)
switch {
case err == nil, os.IsNotExist(err):
case os.IsExist(err):
// Got error saying the target dir already exists, maybe the source doesn't exist due to a previous (failed) remove
if _, e := os.Stat(source); !os.IsNotExist(e) {
return errors.Wrapf(err, "target rename dir '%s' exists but should not, this needs to be manually cleaned up")
}
default:
return errors.Wrapf(err, "error preparing atomic delete")
}
return system.EnsureRemoveAll(target)
}
// Get returns the rootfs path for the id.
// This will mount the dir at it's given path
// This will mount the dir at its given path
func (a *Driver) Get(id, mountLabel string) (string, error) {
a.locker.Lock(id)
defer a.locker.Unlock(id)
parents, err := a.getParentLayerPaths(id)
if err != nil && !os.IsNotExist(err) {
return "", err
@ -344,6 +432,8 @@ func (a *Driver) Get(id, mountLabel string) (string, error) {
// Put unmounts and updates list of active mounts.
func (a *Driver) Put(id string) error {
a.locker.Lock(id)
defer a.locker.Unlock(id)
a.pathCacheLock.Lock()
m, exists := a.pathCache[id]
if !exists {
@ -362,9 +452,22 @@ func (a *Driver) Put(id string) error {
return err
}
// isParent returns if the passed in parent is the direct parent of the passed in layer
func (a *Driver) isParent(id, parent string) bool {
parents, _ := getParentIDs(a.rootPath(), id)
if parent == "" && len(parents) > 0 {
return false
}
return !(len(parents) > 0 && parent != parents[0])
}
// Diff produces an archive of the changes between the specified
// layer and its parent layer which may be "".
func (a *Driver) Diff(id, parent string) (io.ReadCloser, error) {
if !a.isParent(id, parent) {
return a.naiveDiff.Diff(id, parent)
}
// AUFS doesn't need the parent layer to produce a diff.
return archive.TarWithOptions(path.Join(a.rootPath(), "diff", id), &archive.TarOptions{
Compression: archive.Uncompressed,
@ -374,12 +477,6 @@ func (a *Driver) Diff(id, parent string) (io.ReadCloser, error) {
})
}
// AdditionalImageStores returns additional image stores supported by the driver
func (a *Driver) AdditionalImageStores() []string {
var imageStores []string
return imageStores
}
type fileGetNilCloser struct {
storage.FileGetter
}
@ -406,6 +503,9 @@ func (a *Driver) applyDiff(id string, diff io.Reader) error {
// and its parent and returns the size in bytes of the changes
// relative to its base filesystem directory.
func (a *Driver) DiffSize(id, parent string) (size int64, err error) {
if !a.isParent(id, parent) {
return a.naiveDiff.DiffSize(id, parent)
}
// AUFS doesn't need the parent layer to calculate the diff size.
return directory.Size(path.Join(a.rootPath(), "diff", id))
}
@ -414,7 +514,11 @@ func (a *Driver) DiffSize(id, parent string) (size int64, err error) {
// layer with the specified id and parent, returning the size of the
// new layer in bytes.
func (a *Driver) ApplyDiff(id, parent string, diff io.Reader) (size int64, err error) {
// AUFS doesn't need the parent id to apply the diff.
if !a.isParent(id, parent) {
return a.naiveDiff.ApplyDiff(id, parent, diff)
}
// AUFS doesn't need the parent id to apply the diff if it is the direct parent.
if err = a.applyDiff(id, diff); err != nil {
return
}
@ -425,6 +529,10 @@ func (a *Driver) ApplyDiff(id, parent string, diff io.Reader) (size int64, err e
// Changes produces a list of changes between the specified layer
// and its parent layer. If parent is "", then all changes will be ADD changes.
func (a *Driver) Changes(id, parent string) ([]archive.Change, error) {
if !a.isParent(id, parent) {
return a.naiveDiff.Changes(id, parent)
}
// AUFS doesn't have snapshots, so we need to get changes from all parent
// layers.
layers, err := a.getParentLayerPaths(id)
@ -435,7 +543,7 @@ func (a *Driver) Changes(id, parent string) ([]archive.Change, error) {
}
func (a *Driver) getParentLayerPaths(id string) ([]string, error) {
parentIds, err := getParentIds(a.rootPath(), id)
parentIds, err := getParentIDs(a.rootPath(), id)
if err != nil {
return nil, err
}
@ -500,7 +608,7 @@ func (a *Driver) Cleanup() error {
for _, m := range dirs {
if err := a.unmount(m); err != nil {
logrus.Debugf("aufs error unmounting %s: %s", stringid.TruncateID(m), err)
logrus.Debugf("aufs error unmounting %s: %s", m, err)
}
}
return mountpk.Unmount(a.root)
@ -518,46 +626,35 @@ func (a *Driver) aufsMount(ro []string, rw, target, mountLabel string) (err erro
offset := 54
if useDirperm() {
offset += len("dirperm1")
offset += len(",dirperm1")
}
b := make([]byte, syscall.Getpagesize()-len(mountLabel)-offset) // room for xino & mountLabel
b := make([]byte, unix.Getpagesize()-len(mountLabel)-offset) // room for xino & mountLabel
bp := copy(b, fmt.Sprintf("br:%s=rw", rw))
firstMount := true
i := 0
for {
for ; i < len(ro); i++ {
layer := fmt.Sprintf(":%s=ro+wh", ro[i])
if firstMount {
if bp+len(layer) > len(b) {
break
}
bp += copy(b[bp:], layer)
} else {
data := label.FormatMountLabel(fmt.Sprintf("append%s", layer), mountLabel)
if err = mount("none", target, "aufs", syscall.MS_REMOUNT, data); err != nil {
return
}
}
}
if firstMount {
opts := "dio,xino=/dev/shm/aufs.xino"
if useDirperm() {
opts += ",dirperm1"
}
data := label.FormatMountLabel(fmt.Sprintf("%s,%s", string(b[:bp]), opts), mountLabel)
if err = mount("none", target, "aufs", 0, data); err != nil {
return
}
firstMount = false
}
if i == len(ro) {
index := 0
for ; index < len(ro); index++ {
layer := fmt.Sprintf(":%s=ro+wh", ro[index])
if bp+len(layer) > len(b) {
break
}
bp += copy(b[bp:], layer)
}
opts := "dio,xino=/dev/shm/aufs.xino"
if useDirperm() {
opts += ",dirperm1"
}
data := label.FormatMountLabel(fmt.Sprintf("%s,%s", string(b[:bp]), opts), mountLabel)
if err = mount("none", target, "aufs", 0, data); err != nil {
return
}
for ; index < len(ro); index++ {
layer := fmt.Sprintf(":%s=ro+wh", ro[index])
data := label.FormatMountLabel(fmt.Sprintf("append%s", layer), mountLabel)
if err = mount("none", target, "aufs", unix.MS_REMOUNT, data); err != nil {
return
}
}
return

View file

@ -29,7 +29,7 @@ func loadIds(root string) ([]string, error) {
//
// If there are no lines in the file then the id has no parent
// and an empty slice is returned.
func getParentIds(root, id string) ([]string, error) {
func getParentIDs(root, id string) ([]string, error) {
f, err := os.Open(path.Join(root, "layers", id))
if err != nil {
return nil, err

View file

@ -4,9 +4,9 @@ package aufs
import (
"os/exec"
"syscall"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
// Unmount the target specified.
@ -14,7 +14,7 @@ func Unmount(target string) error {
if err := exec.Command("auplink", target, "flush").Run(); err != nil {
logrus.Warnf("Couldn't run auplink before unmount %s: %s", target, err)
}
if err := syscall.Unmount(target, 0); err != nil {
if err := unix.Unmount(target, 0); err != nil {
return err
}
return nil

View file

@ -1,7 +1,7 @@
package aufs
import "syscall"
import "golang.org/x/sys/unix"
func mount(source string, target string, fstype string, flags uintptr, data string) error {
return syscall.Mount(source, target, fstype, flags, data)
return unix.Mount(source, target, fstype, flags, data)
}

View file

@ -2,7 +2,7 @@
package aufs
import "github.com/pkg/errors"
import "errors"
// MsRemount declared to specify a non-linux system mount.
const MsRemount = 0

View file

@ -16,31 +16,32 @@ import "C"
import (
"fmt"
"io/ioutil"
"math"
"os"
"path"
"path/filepath"
"strconv"
"strings"
"syscall"
"sync"
"unsafe"
"github.com/containers/storage/drivers"
"github.com/containers/storage/pkg/idtools"
"github.com/containers/storage/pkg/mount"
"github.com/containers/storage/pkg/parsers"
"github.com/containers/storage/pkg/system"
"github.com/docker/go-units"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
func init() {
graphdriver.Register("btrfs", Init)
}
var (
quotaEnabled = false
userDiskQuota = false
)
type btrfsOptions struct {
minSpace uint64
size uint64
@ -71,18 +72,11 @@ func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (grap
return nil, err
}
opt, err := parseOptions(options)
opt, userDiskQuota, err := parseOptions(options)
if err != nil {
return nil, err
}
if userDiskQuota {
if err := subvolEnableQuota(home); err != nil {
return nil, err
}
quotaEnabled = true
}
driver := &Driver{
home: home,
uidMaps: uidMaps,
@ -90,39 +84,48 @@ func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (grap
options: opt,
}
if userDiskQuota {
if err := driver.subvolEnableQuota(); err != nil {
return nil, err
}
}
return graphdriver.NewNaiveDiffDriver(driver, uidMaps, gidMaps), nil
}
func parseOptions(opt []string) (btrfsOptions, error) {
func parseOptions(opt []string) (btrfsOptions, bool, error) {
var options btrfsOptions
userDiskQuota := false
for _, option := range opt {
key, val, err := parsers.ParseKeyValueOpt(option)
if err != nil {
return options, err
return options, userDiskQuota, err
}
key = strings.ToLower(key)
switch key {
case "btrfs.min_space":
minSpace, err := units.RAMInBytes(val)
if err != nil {
return options, err
return options, userDiskQuota, err
}
userDiskQuota = true
options.minSpace = uint64(minSpace)
default:
return options, fmt.Errorf("Unknown option %s", key)
return options, userDiskQuota, fmt.Errorf("Unknown option %s", key)
}
}
return options, nil
return options, userDiskQuota, nil
}
// Driver contains information about the filesystem mounted.
type Driver struct {
//root of the file system
home string
uidMaps []idtools.IDMap
gidMaps []idtools.IDMap
options btrfsOptions
home string
uidMaps []idtools.IDMap
gidMaps []idtools.IDMap
options btrfsOptions
quotaEnabled bool
once sync.Once
}
// String prints the name of the driver (btrfs).
@ -151,10 +154,8 @@ func (d *Driver) Metadata(id string) (map[string]string, error) {
// Cleanup unmounts the home directory.
func (d *Driver) Cleanup() error {
if quotaEnabled {
if err := subvolDisableQuota(d.home); err != nil {
return err
}
if err := d.subvolDisableQuota(); err != nil {
return err
}
return mount.Unmount(d.home)
@ -197,7 +198,7 @@ func subvolCreate(path, name string) error {
args.name[i] = C.char(c)
}
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_SUBVOL_CREATE,
_, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_SUBVOL_CREATE,
uintptr(unsafe.Pointer(&args)))
if errno != 0 {
return fmt.Errorf("Failed to create btrfs subvolume: %v", errno.Error())
@ -225,7 +226,7 @@ func subvolSnapshot(src, dest, name string) error {
C.set_name_btrfs_ioctl_vol_args_v2(&args, cs)
C.free(unsafe.Pointer(cs))
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(destDir), C.BTRFS_IOC_SNAP_CREATE_V2,
_, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(destDir), C.BTRFS_IOC_SNAP_CREATE_V2,
uintptr(unsafe.Pointer(&args)))
if errno != 0 {
return fmt.Errorf("Failed to create btrfs snapshot: %v", errno.Error())
@ -234,8 +235,8 @@ func subvolSnapshot(src, dest, name string) error {
}
func isSubvolume(p string) (bool, error) {
var bufStat syscall.Stat_t
if err := syscall.Lstat(p, &bufStat); err != nil {
var bufStat unix.Stat_t
if err := unix.Lstat(p, &bufStat); err != nil {
return false, err
}
@ -243,7 +244,7 @@ func isSubvolume(p string) (bool, error) {
return bufStat.Ino == C.BTRFS_FIRST_FREE_OBJECTID, nil
}
func subvolDelete(dirpath, name string) error {
func subvolDelete(dirpath, name string, quotaEnabled bool) error {
dir, err := openDir(dirpath)
if err != nil {
return err
@ -271,7 +272,7 @@ func subvolDelete(dirpath, name string) error {
return fmt.Errorf("Failed to test if %s is a btrfs subvolume: %v", p, err)
}
if sv {
if err := subvolDelete(path.Dir(p), f.Name()); err != nil {
if err := subvolDelete(path.Dir(p), f.Name(), quotaEnabled); err != nil {
return fmt.Errorf("Failed to destroy btrfs child subvolume (%s) of parent (%s): %v", p, dirpath, err)
}
}
@ -282,12 +283,27 @@ func subvolDelete(dirpath, name string) error {
return fmt.Errorf("Recursively walking subvolumes for %s failed: %v", dirpath, err)
}
if quotaEnabled {
if qgroupid, err := subvolLookupQgroup(fullPath); err == nil {
var args C.struct_btrfs_ioctl_qgroup_create_args
args.qgroupid = C.__u64(qgroupid)
_, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QGROUP_CREATE,
uintptr(unsafe.Pointer(&args)))
if errno != 0 {
logrus.Errorf("Failed to delete btrfs qgroup %v for %s: %v", qgroupid, fullPath, errno.Error())
}
} else {
logrus.Errorf("Failed to lookup btrfs qgroup for %s: %v", fullPath, err.Error())
}
}
// all subvolumes have been removed
// now remove the one originally passed in
for i, c := range []byte(name) {
args.name[i] = C.char(c)
}
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_SNAP_DESTROY,
_, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_SNAP_DESTROY,
uintptr(unsafe.Pointer(&args)))
if errno != 0 {
return fmt.Errorf("Failed to destroy btrfs snapshot %s for %s: %v", dirpath, name, errno.Error())
@ -295,8 +311,27 @@ func subvolDelete(dirpath, name string) error {
return nil
}
func subvolEnableQuota(path string) error {
dir, err := openDir(path)
func (d *Driver) updateQuotaStatus() {
d.once.Do(func() {
if !d.quotaEnabled {
// In case quotaEnabled is not set, check qgroup and update quotaEnabled as needed
if err := subvolQgroupStatus(d.home); err != nil {
// quota is still not enabled
return
}
d.quotaEnabled = true
}
})
}
func (d *Driver) subvolEnableQuota() error {
d.updateQuotaStatus()
if d.quotaEnabled {
return nil
}
dir, err := openDir(d.home)
if err != nil {
return err
}
@ -304,17 +339,25 @@ func subvolEnableQuota(path string) error {
var args C.struct_btrfs_ioctl_quota_ctl_args
args.cmd = C.BTRFS_QUOTA_CTL_ENABLE
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_CTL,
_, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_CTL,
uintptr(unsafe.Pointer(&args)))
if errno != 0 {
return fmt.Errorf("Failed to enable btrfs quota for %s: %v", dir, errno.Error())
}
d.quotaEnabled = true
return nil
}
func subvolDisableQuota(path string) error {
dir, err := openDir(path)
func (d *Driver) subvolDisableQuota() error {
d.updateQuotaStatus()
if !d.quotaEnabled {
return nil
}
dir, err := openDir(d.home)
if err != nil {
return err
}
@ -322,24 +365,32 @@ func subvolDisableQuota(path string) error {
var args C.struct_btrfs_ioctl_quota_ctl_args
args.cmd = C.BTRFS_QUOTA_CTL_DISABLE
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_CTL,
_, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_CTL,
uintptr(unsafe.Pointer(&args)))
if errno != 0 {
return fmt.Errorf("Failed to disable btrfs quota for %s: %v", dir, errno.Error())
}
d.quotaEnabled = false
return nil
}
func subvolRescanQuota(path string) error {
dir, err := openDir(path)
func (d *Driver) subvolRescanQuota() error {
d.updateQuotaStatus()
if !d.quotaEnabled {
return nil
}
dir, err := openDir(d.home)
if err != nil {
return err
}
defer closeDir(dir)
var args C.struct_btrfs_ioctl_quota_rescan_args
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_RESCAN_WAIT,
_, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_RESCAN_WAIT,
uintptr(unsafe.Pointer(&args)))
if errno != 0 {
return fmt.Errorf("Failed to rescan btrfs quota for %s: %v", dir, errno.Error())
@ -358,7 +409,7 @@ func subvolLimitQgroup(path string, size uint64) error {
var args C.struct_btrfs_ioctl_qgroup_limit_args
args.lim.max_referenced = C.__u64(size)
args.lim.flags = C.BTRFS_QGROUP_LIMIT_MAX_RFER
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QGROUP_LIMIT,
_, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QGROUP_LIMIT,
uintptr(unsafe.Pointer(&args)))
if errno != 0 {
return fmt.Errorf("Failed to limit qgroup for %s: %v", dir, errno.Error())
@ -367,6 +418,60 @@ func subvolLimitQgroup(path string, size uint64) error {
return nil
}
// subvolQgroupStatus performs a BTRFS_IOC_TREE_SEARCH on the root path
// with search key of BTRFS_QGROUP_STATUS_KEY.
// In case qgroup is enabled, the retuned key type will match BTRFS_QGROUP_STATUS_KEY.
// For more details please see https://github.com/kdave/btrfs-progs/blob/v4.9/qgroup.c#L1035
func subvolQgroupStatus(path string) error {
dir, err := openDir(path)
if err != nil {
return err
}
defer closeDir(dir)
var args C.struct_btrfs_ioctl_search_args
args.key.tree_id = C.BTRFS_QUOTA_TREE_OBJECTID
args.key.min_type = C.BTRFS_QGROUP_STATUS_KEY
args.key.max_type = C.BTRFS_QGROUP_STATUS_KEY
args.key.max_objectid = C.__u64(math.MaxUint64)
args.key.max_offset = C.__u64(math.MaxUint64)
args.key.max_transid = C.__u64(math.MaxUint64)
args.key.nr_items = 4096
_, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_TREE_SEARCH,
uintptr(unsafe.Pointer(&args)))
if errno != 0 {
return fmt.Errorf("Failed to search qgroup for %s: %v", path, errno.Error())
}
sh := (*C.struct_btrfs_ioctl_search_header)(unsafe.Pointer(&args.buf))
if sh._type != C.BTRFS_QGROUP_STATUS_KEY {
return fmt.Errorf("Invalid qgroup search header type for %s: %v", path, sh._type)
}
return nil
}
func subvolLookupQgroup(path string) (uint64, error) {
dir, err := openDir(path)
if err != nil {
return 0, err
}
defer closeDir(dir)
var args C.struct_btrfs_ioctl_ino_lookup_args
args.objectid = C.BTRFS_FIRST_FREE_OBJECTID
_, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_INO_LOOKUP,
uintptr(unsafe.Pointer(&args)))
if errno != 0 {
return 0, fmt.Errorf("Failed to lookup qgroup for %s: %v", dir, errno.Error())
}
if args.treeid == 0 {
return 0, fmt.Errorf("Invalid qgroup id for %s: 0", dir)
}
return uint64(args.treeid), nil
}
func (d *Driver) subvolumesDir() string {
return path.Join(d.home, "subvolumes")
}
@ -375,14 +480,23 @@ func (d *Driver) subvolumesDirID(id string) string {
return path.Join(d.subvolumesDir(), id)
}
func (d *Driver) quotasDir() string {
return path.Join(d.home, "quotas")
}
func (d *Driver) quotasDirID(id string) string {
return path.Join(d.quotasDir(), id)
}
// CreateReadWrite creates a layer that is writable for use as a container
// file system.
func (d *Driver) CreateReadWrite(id, parent, mountLabel string, storageOpt map[string]string) error {
return d.Create(id, parent, mountLabel, storageOpt)
func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts) error {
return d.Create(id, parent, opts)
}
// Create the filesystem with given id.
func (d *Driver) Create(id, parent, mountLabel string, storageOpt map[string]string) error {
func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) error {
quotas := path.Join(d.home, "quotas")
subvolumes := path.Join(d.home, "subvolumes")
rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
if err != nil {
@ -409,14 +523,26 @@ func (d *Driver) Create(id, parent, mountLabel string, storageOpt map[string]str
}
}
var storageOpt map[string]string
if opts != nil {
storageOpt = opts.StorageOpt
}
if _, ok := storageOpt["size"]; ok {
driver := &Driver{}
if err := d.parseStorageOpt(storageOpt, driver); err != nil {
return err
}
if err := d.setStorageSize(path.Join(subvolumes, id), driver); err != nil {
return err
}
if err := idtools.MkdirAllAs(quotas, 0700, rootUID, rootGID); err != nil {
return err
}
if err := ioutil.WriteFile(path.Join(quotas, id), []byte(fmt.Sprint(driver.options.size)), 0644); err != nil {
return err
}
}
// if we have a remapped root (user namespaces enabled), change the created snapshot
@ -427,6 +553,11 @@ func (d *Driver) Create(id, parent, mountLabel string, storageOpt map[string]str
}
}
mountLabel := ""
if opts != nil {
mountLabel = opts.MountLabel
}
return label.Relabel(path.Join(subvolumes, id), mountLabel, false)
}
@ -459,11 +590,8 @@ func (d *Driver) setStorageSize(dir string, driver *Driver) error {
return fmt.Errorf("btrfs: storage size cannot be less than %s", units.HumanSize(float64(d.options.minSpace)))
}
if !quotaEnabled {
if err := subvolEnableQuota(d.home); err != nil {
return err
}
quotaEnabled = true
if err := d.subvolEnableQuota(); err != nil {
return err
}
if err := subvolLimitQgroup(dir, driver.options.size); err != nil {
@ -479,13 +607,25 @@ func (d *Driver) Remove(id string) error {
if _, err := os.Stat(dir); err != nil {
return err
}
if err := subvolDelete(d.subvolumesDir(), id); err != nil {
quotasDir := d.quotasDirID(id)
if _, err := os.Stat(quotasDir); err == nil {
if err := os.Remove(quotasDir); err != nil {
return err
}
} else if !os.IsNotExist(err) {
return err
}
if err := os.RemoveAll(dir); err != nil && !os.IsNotExist(err) {
// Call updateQuotaStatus() to invoke status update
d.updateQuotaStatus()
if err := subvolDelete(d.subvolumesDir(), id, d.quotaEnabled); err != nil {
return err
}
if err := subvolRescanQuota(d.home); err != nil {
if err := system.EnsureRemoveAll(dir); err != nil {
return err
}
if err := d.subvolRescanQuota(); err != nil {
return err
}
return nil
@ -503,6 +643,17 @@ func (d *Driver) Get(id, mountLabel string) (string, error) {
return "", fmt.Errorf("%s: not a directory", dir)
}
if quota, err := ioutil.ReadFile(d.quotasDirID(id)); err == nil {
if size, err := strconv.ParseUint(string(quota), 10, 64); err == nil && size >= d.options.minSpace {
if err := d.subvolEnableQuota(); err != nil {
return "", err
}
if err := subvolLimitQgroup(dir, size); err != nil {
return "", err
}
}
}
return dir, nil
}
@ -522,6 +673,5 @@ func (d *Driver) Exists(id string) bool {
// AdditionalImageStores returns additional image stores supported by the driver
func (d *Driver) AdditionalImageStores() []string {
var imageStores []string
return imageStores
return nil
}

View file

@ -22,30 +22,21 @@ func NewRefCounter(c Checker) *RefCounter {
}
}
// Increment increaes the ref count for the given id and returns the current count
// Increment increases the ref count for the given id and returns the current count
func (c *RefCounter) Increment(path string) int {
c.mu.Lock()
m := c.counts[path]
if m == nil {
m = &minfo{}
c.counts[path] = m
}
// if we are checking this path for the first time check to make sure
// if it was already mounted on the system and make sure we have a correct ref
// count if it is mounted as it is in use.
if !m.check {
m.check = true
if c.checker.IsMounted(path) {
m.count++
}
}
m.count++
c.mu.Unlock()
return m.count
return c.incdec(path, func(minfo *minfo) {
minfo.count++
})
}
// Decrement decreases the ref count for the given id and returns the current count
func (c *RefCounter) Decrement(path string) int {
return c.incdec(path, func(minfo *minfo) {
minfo.count--
})
}
func (c *RefCounter) incdec(path string, infoOp func(minfo *minfo)) int {
c.mu.Lock()
m := c.counts[path]
if m == nil {
@ -61,7 +52,8 @@ func (c *RefCounter) Decrement(path string) int {
m.count++
}
}
m.count--
infoOp(m)
count := m.count
c.mu.Unlock()
return m.count
return count
}

View file

@ -0,0 +1,236 @@
package devmapper
import (
"bufio"
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"reflect"
"strings"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
type directLVMConfig struct {
Device string
ThinpPercent uint64
ThinpMetaPercent uint64
AutoExtendPercent uint64
AutoExtendThreshold uint64
}
var (
errThinpPercentMissing = errors.New("must set both `dm.thinp_percent` and `dm.thinp_metapercent` if either is specified")
errThinpPercentTooBig = errors.New("combined `dm.thinp_percent` and `dm.thinp_metapercent` must not be greater than 100")
errMissingSetupDevice = errors.New("must provide device path in `dm.setup_device` in order to configure direct-lvm")
)
func validateLVMConfig(cfg directLVMConfig) error {
if reflect.DeepEqual(cfg, directLVMConfig{}) {
return nil
}
if cfg.Device == "" {
return errMissingSetupDevice
}
if (cfg.ThinpPercent > 0 && cfg.ThinpMetaPercent == 0) || cfg.ThinpMetaPercent > 0 && cfg.ThinpPercent == 0 {
return errThinpPercentMissing
}
if cfg.ThinpPercent+cfg.ThinpMetaPercent > 100 {
return errThinpPercentTooBig
}
return nil
}
func checkDevAvailable(dev string) error {
lvmScan, err := exec.LookPath("lvmdiskscan")
if err != nil {
logrus.Debug("could not find lvmdiskscan")
return nil
}
out, err := exec.Command(lvmScan).CombinedOutput()
if err != nil {
logrus.WithError(err).Error(string(out))
return nil
}
if !bytes.Contains(out, []byte(dev)) {
return errors.Errorf("%s is not available for use with devicemapper", dev)
}
return nil
}
func checkDevInVG(dev string) error {
pvDisplay, err := exec.LookPath("pvdisplay")
if err != nil {
logrus.Debug("could not find pvdisplay")
return nil
}
out, err := exec.Command(pvDisplay, dev).CombinedOutput()
if err != nil {
logrus.WithError(err).Error(string(out))
return nil
}
scanner := bufio.NewScanner(bytes.NewReader(bytes.TrimSpace(out)))
for scanner.Scan() {
fields := strings.SplitAfter(strings.TrimSpace(scanner.Text()), "VG Name")
if len(fields) > 1 {
// got "VG Name" line"
vg := strings.TrimSpace(fields[1])
if len(vg) > 0 {
return errors.Errorf("%s is already part of a volume group %q: must remove this device from any volume group or provide a different device", dev, vg)
}
logrus.Error(fields)
break
}
}
return nil
}
func checkDevHasFS(dev string) error {
blkid, err := exec.LookPath("blkid")
if err != nil {
logrus.Debug("could not find blkid")
return nil
}
out, err := exec.Command(blkid, dev).CombinedOutput()
if err != nil {
logrus.WithError(err).Error(string(out))
return nil
}
fields := bytes.Fields(out)
for _, f := range fields {
kv := bytes.Split(f, []byte{'='})
if bytes.Equal(kv[0], []byte("TYPE")) {
v := bytes.Trim(kv[1], "\"")
if len(v) > 0 {
return errors.Errorf("%s has a filesystem already, use dm.directlvm_device_force=true if you want to wipe the device", dev)
}
return nil
}
}
return nil
}
func verifyBlockDevice(dev string, force bool) error {
if err := checkDevAvailable(dev); err != nil {
return err
}
if err := checkDevInVG(dev); err != nil {
return err
}
if force {
return nil
}
if err := checkDevHasFS(dev); err != nil {
return err
}
return nil
}
func readLVMConfig(root string) (directLVMConfig, error) {
var cfg directLVMConfig
p := filepath.Join(root, "setup-config.json")
b, err := ioutil.ReadFile(p)
if err != nil {
if os.IsNotExist(err) {
return cfg, nil
}
return cfg, errors.Wrap(err, "error reading existing setup config")
}
// check if this is just an empty file, no need to produce a json error later if so
if len(b) == 0 {
return cfg, nil
}
err = json.Unmarshal(b, &cfg)
return cfg, errors.Wrap(err, "error unmarshaling previous device setup config")
}
func writeLVMConfig(root string, cfg directLVMConfig) error {
p := filepath.Join(root, "setup-config.json")
b, err := json.Marshal(cfg)
if err != nil {
return errors.Wrap(err, "error marshalling direct lvm config")
}
err = ioutil.WriteFile(p, b, 0600)
return errors.Wrap(err, "error writing direct lvm config to file")
}
func setupDirectLVM(cfg directLVMConfig) error {
lvmProfileDir := "/etc/lvm/profile"
binaries := []string{"pvcreate", "vgcreate", "lvcreate", "lvconvert", "lvchange", "thin_check"}
for _, bin := range binaries {
if _, err := exec.LookPath(bin); err != nil {
return errors.Wrap(err, "error looking up command `"+bin+"` while setting up direct lvm")
}
}
err := os.MkdirAll(lvmProfileDir, 0755)
if err != nil {
return errors.Wrap(err, "error creating lvm profile directory")
}
if cfg.AutoExtendPercent == 0 {
cfg.AutoExtendPercent = 20
}
if cfg.AutoExtendThreshold == 0 {
cfg.AutoExtendThreshold = 80
}
if cfg.ThinpPercent == 0 {
cfg.ThinpPercent = 95
}
if cfg.ThinpMetaPercent == 0 {
cfg.ThinpMetaPercent = 1
}
out, err := exec.Command("pvcreate", "-f", cfg.Device).CombinedOutput()
if err != nil {
return errors.Wrap(err, string(out))
}
out, err = exec.Command("vgcreate", "storage", cfg.Device).CombinedOutput()
if err != nil {
return errors.Wrap(err, string(out))
}
out, err = exec.Command("lvcreate", "--wipesignatures", "y", "-n", "thinpool", "storage", "--extents", fmt.Sprintf("%d%%VG", cfg.ThinpPercent)).CombinedOutput()
if err != nil {
return errors.Wrap(err, string(out))
}
out, err = exec.Command("lvcreate", "--wipesignatures", "y", "-n", "thinpoolmeta", "storage", "--extents", fmt.Sprintf("%d%%VG", cfg.ThinpMetaPercent)).CombinedOutput()
if err != nil {
return errors.Wrap(err, string(out))
}
out, err = exec.Command("lvconvert", "-y", "--zero", "n", "-c", "512K", "--thinpool", "storage/thinpool", "--poolmetadata", "storage/thinpoolmeta").CombinedOutput()
if err != nil {
return errors.Wrap(err, string(out))
}
profile := fmt.Sprintf("activation{\nthin_pool_autoextend_threshold=%d\nthin_pool_autoextend_percent=%d\n}", cfg.AutoExtendThreshold, cfg.AutoExtendPercent)
err = ioutil.WriteFile(lvmProfileDir+"/storage-thinpool.profile", []byte(profile), 0600)
if err != nil {
return errors.Wrap(err, "error writing storage thinp autoextend profile")
}
out, err = exec.Command("lvchange", "--metadataprofile", "storage-thinpool", "storage/thinpool").CombinedOutput()
return errors.Wrap(err, string(out))
}

View file

@ -12,44 +12,41 @@ import (
"os/exec"
"path"
"path/filepath"
"reflect"
"strconv"
"strings"
"sync"
"syscall"
"time"
"github.com/sirupsen/logrus"
"github.com/containers/storage/drivers"
"github.com/containers/storage/pkg/devicemapper"
"github.com/containers/storage/pkg/dmesg"
"github.com/containers/storage/pkg/idtools"
"github.com/containers/storage/pkg/ioutils"
"github.com/containers/storage/pkg/loopback"
"github.com/containers/storage/pkg/mount"
"github.com/containers/storage/pkg/parsers"
"github.com/docker/go-units"
"github.com/containers/storage/pkg/parsers/kernel"
units "github.com/docker/go-units"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
var (
defaultDataLoopbackSize int64 = 100 * 1024 * 1024 * 1024
defaultMetaDataLoopbackSize int64 = 2 * 1024 * 1024 * 1024
defaultBaseFsSize uint64 = 10 * 1024 * 1024 * 1024
defaultThinpBlockSize uint32 = 128 // 64K = 128 512b sectors
defaultUdevSyncOverride = false
maxDeviceID = 0xffffff // 24 bit, pool limit
deviceIDMapSz = (maxDeviceID + 1) / 8
// We retry device removal so many a times that even error messages
// will fill up console during normal operation. So only log Fatal
// messages by default.
logLevel = devicemapper.LogLevelFatal
defaultDataLoopbackSize int64 = 100 * 1024 * 1024 * 1024
defaultMetaDataLoopbackSize int64 = 2 * 1024 * 1024 * 1024
defaultBaseFsSize uint64 = 10 * 1024 * 1024 * 1024
defaultThinpBlockSize uint32 = 128 // 64K = 128 512b sectors
defaultUdevSyncOverride = false
maxDeviceID = 0xffffff // 24 bit, pool limit
deviceIDMapSz = (maxDeviceID + 1) / 8
driverDeferredRemovalSupport = false
enableDeferredRemoval = false
enableDeferredDeletion = false
userBaseSize = false
defaultMinFreeSpacePercent uint32 = 10
lvmSetupConfigForce bool
)
const deviceSetMetaFile string = "deviceset-metadata"
@ -122,6 +119,8 @@ type DeviceSet struct {
uidMaps []idtools.IDMap
gidMaps []idtools.IDMap
minFreeSpacePercent uint32 //min free space percentage in thinpool
xfsNospaceRetries string // max retries when xfs receives ENOSPC
lvmSetupConfig directLVMConfig
}
// DiskUsage contains information about disk usage and is used when reporting Status of a device.
@ -170,7 +169,7 @@ type Status struct {
MinFreeSpace uint64
}
// Structure used to export image/container metadata in docker inspect.
// Structure used to export image/container metadata in inspect.
type deviceMetadata struct {
deviceID int
deviceSize uint64 // size in bytes
@ -379,10 +378,7 @@ func (devices *DeviceSet) isDeviceIDFree(deviceID int) bool {
var mask byte
i := deviceID % 8
mask = (1 << uint(i))
if (devices.deviceIDMap[deviceID/8] & mask) != 0 {
return false
}
return true
return (devices.deviceIDMap[deviceID/8] & mask) == 0
}
// Should be called with devices.Lock() held.
@ -409,8 +405,8 @@ func (devices *DeviceSet) lookupDeviceWithLock(hash string) (*devInfo, error) {
// This function relies on that device hash map has been loaded in advance.
// Should be called with devices.Lock() held.
func (devices *DeviceSet) constructDeviceIDMap() {
logrus.Debugf("devmapper: constructDeviceIDMap()")
defer logrus.Debugf("devmapper: constructDeviceIDMap() END")
logrus.Debug("devmapper: constructDeviceIDMap()")
defer logrus.Debug("devmapper: constructDeviceIDMap() END")
for _, info := range devices.Devices {
devices.markDeviceIDUsed(info.DeviceID)
@ -458,8 +454,8 @@ func (devices *DeviceSet) deviceFileWalkFunction(path string, finfo os.FileInfo)
}
func (devices *DeviceSet) loadDeviceFilesOnStart() error {
logrus.Debugf("devmapper: loadDeviceFilesOnStart()")
defer logrus.Debugf("devmapper: loadDeviceFilesOnStart() END")
logrus.Debug("devmapper: loadDeviceFilesOnStart()")
defer logrus.Debug("devmapper: loadDeviceFilesOnStart() END")
var scan = func(path string, info os.FileInfo, err error) error {
if err != nil {
@ -479,11 +475,10 @@ func (devices *DeviceSet) loadDeviceFilesOnStart() error {
}
// Should be called with devices.Lock() held.
func (devices *DeviceSet) unregisterDevice(id int, hash string) error {
logrus.Debugf("devmapper: unregisterDevice(%v, %v)", id, hash)
func (devices *DeviceSet) unregisterDevice(hash string) error {
logrus.Debugf("devmapper: unregisterDevice(%v)", hash)
info := &devInfo{
Hash: hash,
DeviceID: id,
Hash: hash,
}
delete(devices.Devices, hash)
@ -528,7 +523,7 @@ func (devices *DeviceSet) activateDeviceIfNeeded(info *devInfo, ignoreDeleted bo
// Make sure deferred removal on device is canceled, if one was
// scheduled.
if err := devices.cancelDeferredRemoval(info); err != nil {
if err := devices.cancelDeferredRemovalIfNeeded(info); err != nil {
return fmt.Errorf("devmapper: Device Deferred Removal Cancellation Failed: %s", err)
}
@ -539,11 +534,11 @@ func (devices *DeviceSet) activateDeviceIfNeeded(info *devInfo, ignoreDeleted bo
return devicemapper.ActivateDevice(devices.getPoolDevName(), info.Name(), info.DeviceID, info.Size)
}
// Return true only if kernel supports xfs and mkfs.xfs is available
func xfsSupported() bool {
// xfsSupported checks if xfs is supported, returns nil if it is, otherwise an error
func xfsSupported() error {
// Make sure mkfs.xfs is available
if _, err := exec.LookPath("mkfs.xfs"); err != nil {
return false
return err // error text is descriptive enough
}
// Check if kernel supports xfs filesystem or not.
@ -551,43 +546,48 @@ func xfsSupported() bool {
f, err := os.Open("/proc/filesystems")
if err != nil {
logrus.Warnf("devmapper: Could not check if xfs is supported: %v", err)
return false
return errors.Wrapf(err, "error checking for xfs support")
}
defer f.Close()
s := bufio.NewScanner(f)
for s.Scan() {
if strings.HasSuffix(s.Text(), "\txfs") {
return true
return nil
}
}
if err := s.Err(); err != nil {
logrus.Warnf("devmapper: Could not check if xfs is supported: %v", err)
return errors.Wrapf(err, "error checking for xfs support")
}
return false
return errors.New(`kernel does not support xfs, or "modprobe xfs" failed`)
}
func determineDefaultFS() string {
if xfsSupported() {
err := xfsSupported()
if err == nil {
return "xfs"
}
logrus.Warn("devmapper: XFS is not supported in your system. Either the kernel doesn't support it or mkfs.xfs is not in your PATH. Defaulting to ext4 filesystem")
logrus.Warnf("devmapper: XFS is not supported in your system (%v). Defaulting to ext4 filesystem", err)
return "ext4"
}
// mkfsOptions tries to figure out whether some additional mkfs options are required
func mkfsOptions(fs string) []string {
if fs == "xfs" && !kernel.CheckKernelVersion(3, 16, 0) {
// For kernels earlier than 3.16 (and newer xfsutils),
// some xfs features need to be explicitly disabled.
return []string{"-m", "crc=0,finobt=0"}
}
return []string{}
}
func (devices *DeviceSet) createFilesystem(info *devInfo) (err error) {
devname := info.DevName()
args := []string{}
for _, arg := range devices.mkfsArgs {
args = append(args, arg)
}
args = append(args, devname)
if devices.filesystem == "" {
devices.filesystem = determineDefaultFS()
}
@ -595,7 +595,11 @@ func (devices *DeviceSet) createFilesystem(info *devInfo) (err error) {
return err
}
logrus.Infof("devmapper: Creating filesystem %s on device %s", devices.filesystem, info.Name())
args := mkfsOptions(devices.filesystem)
args = append(args, devices.mkfsArgs...)
args = append(args, devname)
logrus.Infof("devmapper: Creating filesystem %s on device %s, mkfs args: %v", devices.filesystem, info.Name(), args)
defer func() {
if err != nil {
logrus.Infof("devmapper: Error while creating filesystem %s on device %s: %v", devices.filesystem, info.Name(), err)
@ -833,7 +837,7 @@ func (devices *DeviceSet) createRegisterDevice(hash string) (*devInfo, error) {
}
if err := devices.closeTransaction(); err != nil {
devices.unregisterDevice(deviceID, hash)
devices.unregisterDevice(hash)
devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID)
devices.markDeviceIDFree(deviceID)
return nil, err
@ -841,11 +845,57 @@ func (devices *DeviceSet) createRegisterDevice(hash string) (*devInfo, error) {
return info, nil
}
func (devices *DeviceSet) createRegisterSnapDevice(hash string, baseInfo *devInfo, size uint64) error {
if err := devices.poolHasFreeSpace(); err != nil {
func (devices *DeviceSet) takeSnapshot(hash string, baseInfo *devInfo, size uint64) error {
var (
devinfo *devicemapper.Info
err error
)
if err = devices.poolHasFreeSpace(); err != nil {
return err
}
if devices.deferredRemove {
devinfo, err = devicemapper.GetInfoWithDeferred(baseInfo.Name())
if err != nil {
return err
}
if devinfo != nil && devinfo.DeferredRemove != 0 {
err = devices.cancelDeferredRemoval(baseInfo)
if err != nil {
// If Error is ErrEnxio. Device is probably already gone. Continue.
if errors.Cause(err) != devicemapper.ErrEnxio {
return err
}
devinfo = nil
} else {
defer devices.deactivateDevice(baseInfo)
}
}
} else {
devinfo, err = devicemapper.GetInfo(baseInfo.Name())
if err != nil {
return err
}
}
doSuspend := devinfo != nil && devinfo.Exists != 0
if doSuspend {
if err = devicemapper.SuspendDevice(baseInfo.Name()); err != nil {
return err
}
defer devicemapper.ResumeDevice(baseInfo.Name())
}
if err = devices.createRegisterSnapDevice(hash, baseInfo, size); err != nil {
return err
}
return nil
}
func (devices *DeviceSet) createRegisterSnapDevice(hash string, baseInfo *devInfo, size uint64) error {
deviceID, err := devices.getNextFreeDeviceID()
if err != nil {
return err
@ -858,7 +908,7 @@ func (devices *DeviceSet) createRegisterSnapDevice(hash string, baseInfo *devInf
}
for {
if err := devicemapper.CreateSnapDevice(devices.getPoolDevName(), deviceID, baseInfo.Name(), baseInfo.DeviceID); err != nil {
if err := devicemapper.CreateSnapDeviceRaw(devices.getPoolDevName(), deviceID, baseInfo.DeviceID); err != nil {
if devicemapper.DeviceIDExists(err) {
// Device ID already exists. This should not
// happen. Now we have a mechanism to find
@ -888,7 +938,7 @@ func (devices *DeviceSet) createRegisterSnapDevice(hash string, baseInfo *devInf
}
if err := devices.closeTransaction(); err != nil {
devices.unregisterDevice(deviceID, hash)
devices.unregisterDevice(hash)
devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID)
devices.markDeviceIDFree(deviceID)
return err
@ -1134,7 +1184,7 @@ func (devices *DeviceSet) growFS(info *devInfo) error {
defer devices.deactivateDevice(info)
fsMountPoint := "/run/containers/mnt"
fsMountPoint := "/run/containers/storage/mnt"
if _, err := os.Stat(fsMountPoint); os.IsNotExist(err) {
if err := os.MkdirAll(fsMountPoint, 0700); err != nil {
return err
@ -1150,10 +1200,10 @@ func (devices *DeviceSet) growFS(info *devInfo) error {
options = joinMountOptions(options, devices.mountOptions)
if err := mount.Mount(info.DevName(), fsMountPoint, devices.BaseDeviceFilesystem, options); err != nil {
return fmt.Errorf("Error mounting '%s' on '%s': %s", info.DevName(), fsMountPoint, err)
return fmt.Errorf("Error mounting '%s' on '%s': %s\n%v", info.DevName(), fsMountPoint, err, string(dmesg.Dmesg(256)))
}
defer syscall.Unmount(fsMountPoint, syscall.MNT_DETACH)
defer unix.Unmount(fsMountPoint, unix.MNT_DETACH)
switch devices.BaseDeviceFilesystem {
case "ext4":
@ -1216,39 +1266,18 @@ func (devices *DeviceSet) setupBaseImage() error {
}
func setCloseOnExec(name string) {
if fileInfos, _ := ioutil.ReadDir("/proc/self/fd"); fileInfos != nil {
for _, i := range fileInfos {
link, _ := os.Readlink(filepath.Join("/proc/self/fd", i.Name()))
if link == name {
fd, err := strconv.Atoi(i.Name())
if err == nil {
syscall.CloseOnExec(fd)
}
fileInfos, _ := ioutil.ReadDir("/proc/self/fd")
for _, i := range fileInfos {
link, _ := os.Readlink(filepath.Join("/proc/self/fd", i.Name()))
if link == name {
fd, err := strconv.Atoi(i.Name())
if err == nil {
unix.CloseOnExec(fd)
}
}
}
}
// DMLog implements logging using DevMapperLogger interface.
func (devices *DeviceSet) DMLog(level int, file string, line int, dmError int, message string) {
// By default libdm sends us all the messages including debug ones.
// We need to filter out messages here and figure out which one
// should be printed.
if level > logLevel {
return
}
// FIXME(vbatts) push this back into ./pkg/devicemapper/
if level <= devicemapper.LogLevelErr {
logrus.Errorf("libdevmapper(%d): %s:%d (%d) %s", level, file, line, dmError, message)
} else if level <= devicemapper.LogLevelInfo {
logrus.Infof("libdevmapper(%d): %s:%d (%d) %s", level, file, line, dmError, message)
} else {
// FIXME(vbatts) push this back into ./pkg/devicemapper/
logrus.Debugf("libdevmapper(%d): %s:%d (%d) %s", level, file, line, dmError, message)
}
}
func major(device uint64) uint64 {
return (device >> 8) & 0xfff
}
@ -1356,10 +1385,7 @@ func (devices *DeviceSet) saveTransactionMetaData() error {
}
func (devices *DeviceSet) removeTransactionMetaData() error {
if err := os.RemoveAll(devices.transactionMetaFile()); err != nil {
return err
}
return nil
return os.RemoveAll(devices.transactionMetaFile())
}
func (devices *DeviceSet) rollbackTransaction() error {
@ -1464,12 +1490,9 @@ func (devices *DeviceSet) closeTransaction() error {
}
func determineDriverCapabilities(version string) error {
/*
* Driver version 4.27.0 and greater support deferred activation
* feature.
*/
// Kernel driver version >= 4.27.0 support deferred removal
logrus.Debugf("devicemapper: driver version is %s", version)
logrus.Debugf("devicemapper: kernel dm driver version is %s", version)
versionSplit := strings.Split(version, ".")
major, err := strconv.Atoi(versionSplit[0])
@ -1505,12 +1528,13 @@ func determineDriverCapabilities(version string) error {
// Determine the major and minor number of loopback device
func getDeviceMajorMinor(file *os.File) (uint64, uint64, error) {
stat, err := file.Stat()
var stat unix.Stat_t
err := unix.Stat(file.Name(), &stat)
if err != nil {
return 0, 0, err
}
dev := stat.Sys().(*syscall.Stat_t).Rdev
dev := stat.Rdev
majorNum := major(dev)
minorNum := minor(dev)
@ -1648,36 +1672,19 @@ func (devices *DeviceSet) enableDeferredRemovalDeletion() error {
return nil
}
func (devices *DeviceSet) initDevmapper(doInit bool) error {
// give ourselves to libdm as a log handler
devicemapper.LogInit(devices)
version, err := devicemapper.GetDriverVersion()
if err != nil {
// Can't even get driver version, assume not supported
return errors.Wrap(graphdriver.ErrNotSupported, "unable to determine version of device mapper")
}
if err := determineDriverCapabilities(version); err != nil {
return errors.Wrap(graphdriver.ErrNotSupported, "unable to determine device mapper driver capabilities")
}
func (devices *DeviceSet) initDevmapper(doInit bool) (retErr error) {
if err := devices.enableDeferredRemovalDeletion(); err != nil {
return err
}
// https://github.com/docker/docker/issues/4036
// if supported := devicemapper.UdevSetSyncSupport(true); !supported {
// if storageversion.IAmStatic == "true" {
// logrus.Errorf("devmapper: Udev sync is not supported. This will lead to data loss and unexpected behavior. Install a dynamic binary to use devicemapper or select a different storage driver. For more information, see https://docs.docker.com/engine/reference/commandline/daemon/#daemon-storage-driver-option")
// } else {
// logrus.Errorf("devmapper: Udev sync is not supported. This will lead to data loss and unexpected behavior. Install a more recent version of libdevmapper or select a different storage driver. For more information, see https://docs.docker.com/engine/reference/commandline/daemon/#daemon-storage-driver-option")
// }
//
// if !devices.overrideUdevSyncCheck {
// return graphdriver.ErrNotSupported
// }
// }
if supported := devicemapper.UdevSetSyncSupport(true); !supported {
logrus.Error("devmapper: Udev sync is not supported. This will lead to data loss and unexpected behavior. Install a more recent version of libdevmapper or select a different storage driver. For more information, see https://docs.docker.com/engine/reference/commandline/dockerd/#storage-driver-options")
if !devices.overrideUdevSyncCheck {
return graphdriver.ErrNotSupported
}
}
//create the root dir of the devmapper driver ownership to match this
//daemon's remapped root uid/gid so containers can start properly
@ -1692,20 +1699,47 @@ func (devices *DeviceSet) initDevmapper(doInit bool) error {
return err
}
// Set the device prefix from the device id and inode of the container root dir
st, err := os.Stat(devices.root)
prevSetupConfig, err := readLVMConfig(devices.root)
if err != nil {
return err
}
if !reflect.DeepEqual(devices.lvmSetupConfig, directLVMConfig{}) {
if devices.thinPoolDevice != "" {
return errors.New("cannot setup direct-lvm when `dm.thinpooldev` is also specified")
}
if !reflect.DeepEqual(prevSetupConfig, devices.lvmSetupConfig) {
if !reflect.DeepEqual(prevSetupConfig, directLVMConfig{}) {
return errors.New("changing direct-lvm config is not supported")
}
logrus.WithField("storage-driver", "devicemapper").WithField("direct-lvm-config", devices.lvmSetupConfig).Debugf("Setting up direct lvm mode")
if err := verifyBlockDevice(devices.lvmSetupConfig.Device, lvmSetupConfigForce); err != nil {
return err
}
if err := setupDirectLVM(devices.lvmSetupConfig); err != nil {
return err
}
if err := writeLVMConfig(devices.root, devices.lvmSetupConfig); err != nil {
return err
}
}
devices.thinPoolDevice = "storage-thinpool"
logrus.WithField("storage-driver", "devicemapper").Debugf("Setting dm.thinpooldev to %q", devices.thinPoolDevice)
}
// Set the device prefix from the device id and inode of the storage root dir
var st unix.Stat_t
if err := unix.Stat(devices.root, &st); err != nil {
return fmt.Errorf("devmapper: Error looking up dir %s: %s", devices.root, err)
}
sysSt := st.Sys().(*syscall.Stat_t)
// "reg-" stands for "regular file".
// In the future we might use "dev-" for "device file", etc.
// container-maj,min[-inode] stands for:
// - Managed by container storage
// - The target of this device is at major <maj> and minor <min>
// - If <inode> is defined, use that file inside the device as a loopback image. Otherwise use the device itself.
devices.devicePrefix = fmt.Sprintf("container-%d:%d-%d", major(sysSt.Dev), minor(sysSt.Dev), sysSt.Ino)
devices.devicePrefix = fmt.Sprintf("container-%d:%d-%d", major(st.Dev), minor(st.Dev), st.Ino)
logrus.Debugf("devmapper: Generated prefix: %s", devices.devicePrefix)
// Check for the existence of the thin-pool device
@ -1748,7 +1782,7 @@ func (devices *DeviceSet) initDevmapper(doInit bool) error {
hasData := devices.hasImage("data")
if !doInit && !hasData {
return errors.New("Loopback data file not found")
return errors.New("loopback data file not found")
}
if !hasData {
@ -1781,7 +1815,7 @@ func (devices *DeviceSet) initDevmapper(doInit bool) error {
hasMetadata := devices.hasImage("metadata")
if !doInit && !hasMetadata {
return errors.New("Loopback metadata file not found")
return errors.New("loopback metadata file not found")
}
if !hasMetadata {
@ -1811,6 +1845,14 @@ func (devices *DeviceSet) initDevmapper(doInit bool) error {
if err := devicemapper.CreatePool(devices.getPoolName(), dataFile, metadataFile, devices.thinpBlockSize); err != nil {
return err
}
defer func() {
if retErr != nil {
err = devices.deactivatePool()
if err != nil {
logrus.Warnf("devmapper: Failed to deactivatePool: %v", err)
}
}
}()
}
// Pool already exists and caller did not pass us a pool. That means
@ -1857,8 +1899,8 @@ func (devices *DeviceSet) initDevmapper(doInit bool) error {
// AddDevice adds a device and registers in the hash.
func (devices *DeviceSet) AddDevice(hash, baseHash string, storageOpt map[string]string) error {
logrus.Debugf("devmapper: AddDevice(hash=%s basehash=%s)", hash, baseHash)
defer logrus.Debugf("devmapper: AddDevice(hash=%s basehash=%s) END", hash, baseHash)
logrus.Debugf("devmapper: AddDevice START(hash=%s basehash=%s)", hash, baseHash)
defer logrus.Debugf("devmapper: AddDevice END(hash=%s basehash=%s)", hash, baseHash)
// If a deleted device exists, return error.
baseInfo, err := devices.lookupDeviceWithLock(baseHash)
@ -1895,7 +1937,7 @@ func (devices *DeviceSet) AddDevice(hash, baseHash string, storageOpt map[string
return fmt.Errorf("devmapper: Container size cannot be smaller than %s", units.HumanSize(float64(baseInfo.Size)))
}
if err := devices.createRegisterSnapDevice(hash, baseInfo, size); err != nil {
if err := devices.takeSnapshot(hash, baseInfo, size); err != nil {
return err
}
@ -1975,7 +2017,7 @@ func (devices *DeviceSet) deleteTransaction(info *devInfo, syncDelete bool) erro
}
if err == nil {
if err := devices.unregisterDevice(info.DeviceID, info.Hash); err != nil {
if err := devices.unregisterDevice(info.Hash); err != nil {
return err
}
// If device was already in deferred delete state that means
@ -1996,8 +2038,8 @@ func (devices *DeviceSet) deleteTransaction(info *devInfo, syncDelete bool) erro
// Issue discard only if device open count is zero.
func (devices *DeviceSet) issueDiscard(info *devInfo) error {
logrus.Debugf("devmapper: issueDiscard(device: %s). START", info.Hash)
defer logrus.Debugf("devmapper: issueDiscard(device: %s). END", info.Hash)
logrus.Debugf("devmapper: issueDiscard START(device: %s).", info.Hash)
defer logrus.Debugf("devmapper: issueDiscard END(device: %s).", info.Hash)
// This is a workaround for the kernel not discarding block so
// on the thin pool when we remove a thinp device, so we do it
// manually.
@ -2030,7 +2072,16 @@ func (devices *DeviceSet) deleteDevice(info *devInfo, syncDelete bool) error {
}
// Try to deactivate device in case it is active.
if err := devices.deactivateDevice(info); err != nil {
// If deferred removal is enabled and deferred deletion is disabled
// then make sure device is removed synchronously. There have been
// some cases of device being busy for short duration and we would
// rather busy wait for device removal to take care of these cases.
deferredRemove := devices.deferredRemove
if !devices.deferredDelete {
deferredRemove = false
}
if err := devices.deactivateDeviceMode(info, deferredRemove); err != nil {
logrus.Debugf("devmapper: Error deactivating device: %s", err)
return err
}
@ -2046,8 +2097,8 @@ func (devices *DeviceSet) deleteDevice(info *devInfo, syncDelete bool) error {
// removal. If one wants to override that and want DeleteDevice() to fail if
// device was busy and could not be deleted, set syncDelete=true.
func (devices *DeviceSet) DeleteDevice(hash string, syncDelete bool) error {
logrus.Debugf("devmapper: DeleteDevice(hash=%v syncDelete=%v) START", hash, syncDelete)
defer logrus.Debugf("devmapper: DeleteDevice(hash=%v syncDelete=%v) END", hash, syncDelete)
logrus.Debugf("devmapper: DeleteDevice START(hash=%v syncDelete=%v)", hash, syncDelete)
defer logrus.Debugf("devmapper: DeleteDevice END(hash=%v syncDelete=%v)", hash, syncDelete)
info, err := devices.lookupDeviceWithLock(hash)
if err != nil {
return err
@ -2063,8 +2114,8 @@ func (devices *DeviceSet) DeleteDevice(hash string, syncDelete bool) error {
}
func (devices *DeviceSet) deactivatePool() error {
logrus.Debug("devmapper: deactivatePool()")
defer logrus.Debug("devmapper: deactivatePool END")
logrus.Debug("devmapper: deactivatePool() START")
defer logrus.Debug("devmapper: deactivatePool() END")
devname := devices.getPoolDevName()
devinfo, err := devicemapper.GetInfo(devname)
@ -2087,7 +2138,12 @@ func (devices *DeviceSet) deactivatePool() error {
}
func (devices *DeviceSet) deactivateDevice(info *devInfo) error {
logrus.Debugf("devmapper: deactivateDevice(%s)", info.Hash)
return devices.deactivateDeviceMode(info, devices.deferredRemove)
}
func (devices *DeviceSet) deactivateDeviceMode(info *devInfo, deferredRemove bool) error {
var err error
logrus.Debugf("devmapper: deactivateDevice START(%s)", info.Hash)
defer logrus.Debugf("devmapper: deactivateDevice END(%s)", info.Hash)
devinfo, err := devicemapper.GetInfo(info.Name())
@ -2099,14 +2155,17 @@ func (devices *DeviceSet) deactivateDevice(info *devInfo) error {
return nil
}
if devices.deferredRemove {
if err := devicemapper.RemoveDeviceDeferred(info.Name()); err != nil {
return err
}
if deferredRemove {
err = devicemapper.RemoveDeviceDeferred(info.Name())
} else {
if err := devices.removeDevice(info.Name()); err != nil {
return err
}
err = devices.removeDevice(info.Name())
}
// This function's semantics is such that it does not return an
// error if device does not exist. So if device went away by
// the time we actually tried to remove it, do not return error.
if errors.Cause(err) != devicemapper.ErrEnxio {
return err
}
return nil
}
@ -2137,41 +2196,53 @@ func (devices *DeviceSet) removeDevice(devname string) error {
return err
}
func (devices *DeviceSet) cancelDeferredRemoval(info *devInfo) error {
func (devices *DeviceSet) cancelDeferredRemovalIfNeeded(info *devInfo) error {
if !devices.deferredRemove {
return nil
}
logrus.Debugf("devmapper: cancelDeferredRemoval START(%s)", info.Name())
defer logrus.Debugf("devmapper: cancelDeferredRemoval END(%s)", info.Name())
logrus.Debugf("devmapper: cancelDeferredRemovalIfNeeded START(%s)", info.Name())
defer logrus.Debugf("devmapper: cancelDeferredRemovalIfNeeded END(%s)", info.Name())
devinfo, err := devicemapper.GetInfoWithDeferred(info.Name())
if err != nil {
return err
}
if devinfo != nil && devinfo.DeferredRemove == 0 {
return nil
}
// Cancel deferred remove
for i := 0; i < 100; i++ {
err = devicemapper.CancelDeferredRemove(info.Name())
if err == nil {
break
}
if errors.Cause(err) == devicemapper.ErrEnxio {
// Device is probably already gone. Return success.
return nil
}
if err := devices.cancelDeferredRemoval(info); err != nil {
// If Error is ErrEnxio. Device is probably already gone. Continue.
if errors.Cause(err) != devicemapper.ErrBusy {
return err
}
}
return nil
}
// If we see EBUSY it may be a transient error,
// sleep a bit a retry a few times.
devices.Unlock()
time.Sleep(100 * time.Millisecond)
devices.Lock()
func (devices *DeviceSet) cancelDeferredRemoval(info *devInfo) error {
logrus.Debugf("devmapper: cancelDeferredRemoval START(%s)", info.Name())
defer logrus.Debugf("devmapper: cancelDeferredRemoval END(%s)", info.Name())
var err error
// Cancel deferred remove
for i := 0; i < 100; i++ {
err = devicemapper.CancelDeferredRemove(info.Name())
if err != nil {
if errors.Cause(err) != devicemapper.ErrBusy {
// If we see EBUSY it may be a transient error,
// sleep a bit a retry a few times.
devices.Unlock()
time.Sleep(100 * time.Millisecond)
devices.Lock()
continue
}
}
break
}
return err
}
@ -2209,9 +2280,6 @@ func (devices *DeviceSet) Shutdown(home string) error {
if err != nil {
return err
}
if p == path.Join(home, "mnt") {
return nil
}
if !info.IsDir() {
return nil
}
@ -2220,7 +2288,7 @@ func (devices *DeviceSet) Shutdown(home string) error {
// We use MNT_DETACH here in case it is still busy in some running
// container. This means it'll go away from the global scope directly,
// and the device will be released when that container dies.
if err := syscall.Unmount(p, syscall.MNT_DETACH); err != nil {
if err := unix.Unmount(p, unix.MNT_DETACH); err != nil {
logrus.Debugf("devmapper: Shutdown unmounting %s, error: %s", p, err)
}
}
@ -2263,6 +2331,34 @@ func (devices *DeviceSet) Shutdown(home string) error {
return nil
}
// Recent XFS changes allow changing behavior of filesystem in case of errors.
// When thin pool gets full and XFS gets ENOSPC error, currently it tries
// IO infinitely and sometimes it can block the container process
// and process can't be killWith 0 value, XFS will not retry upon error
// and instead will shutdown filesystem.
func (devices *DeviceSet) xfsSetNospaceRetries(info *devInfo) error {
dmDevicePath, err := os.Readlink(info.DevName())
if err != nil {
return fmt.Errorf("devmapper: readlink failed for device %v:%v", info.DevName(), err)
}
dmDeviceName := path.Base(dmDevicePath)
filePath := "/sys/fs/xfs/" + dmDeviceName + "/error/metadata/ENOSPC/max_retries"
maxRetriesFile, err := os.OpenFile(filePath, os.O_WRONLY, 0)
if err != nil {
return fmt.Errorf("devmapper: user specified daemon option dm.xfs_nospace_max_retries but it does not seem to be supported on this system :%v", err)
}
defer maxRetriesFile.Close()
// Set max retries to 0
_, err = maxRetriesFile.WriteString(devices.xfsNospaceRetries)
if err != nil {
return fmt.Errorf("devmapper: Failed to write string %v to file %v:%v", devices.xfsNospaceRetries, filePath, err)
}
return nil
}
// MountDevice mounts the device if not already mounted.
func (devices *DeviceSet) MountDevice(hash, path, mountLabel string) error {
info, err := devices.lookupDeviceWithLock(hash)
@ -2300,7 +2396,15 @@ func (devices *DeviceSet) MountDevice(hash, path, mountLabel string) error {
options = joinMountOptions(options, label.FormatMountLabel("", mountLabel))
if err := mount.Mount(info.DevName(), path, fstype, options); err != nil {
return fmt.Errorf("devmapper: Error mounting '%s' on '%s': %s", info.DevName(), path, err)
return fmt.Errorf("devmapper: Error mounting '%s' on '%s': %s\n%v", info.DevName(), path, err, string(dmesg.Dmesg(256)))
}
if fstype == "xfs" && devices.xfsNospaceRetries != "" {
if err := devices.xfsSetNospaceRetries(info); err != nil {
unix.Unmount(path, unix.MNT_DETACH)
devices.deactivateDevice(info)
return err
}
}
return nil
@ -2308,8 +2412,8 @@ func (devices *DeviceSet) MountDevice(hash, path, mountLabel string) error {
// UnmountDevice unmounts the device and removes it from hash.
func (devices *DeviceSet) UnmountDevice(hash, mountPath string) error {
logrus.Debugf("devmapper: UnmountDevice(hash=%s)", hash)
defer logrus.Debugf("devmapper: UnmountDevice(hash=%s) END", hash)
logrus.Debugf("devmapper: UnmountDevice START(hash=%s)", hash)
defer logrus.Debugf("devmapper: UnmountDevice END(hash=%s)", hash)
info, err := devices.lookupDeviceWithLock(hash)
if err != nil {
@ -2323,16 +2427,12 @@ func (devices *DeviceSet) UnmountDevice(hash, mountPath string) error {
defer devices.Unlock()
logrus.Debugf("devmapper: Unmount(%s)", mountPath)
if err := syscall.Unmount(mountPath, syscall.MNT_DETACH); err != nil {
if err := unix.Unmount(mountPath, unix.MNT_DETACH); err != nil {
return err
}
logrus.Debug("devmapper: Unmount done")
if err := devices.deactivateDevice(info); err != nil {
return err
}
return nil
return devices.deactivateDevice(info)
}
// HasDevice returns true if the device metadata exists.
@ -2424,8 +2524,8 @@ func (devices *DeviceSet) MetadataDevicePath() string {
}
func (devices *DeviceSet) getUnderlyingAvailableSpace(loopFile string) (uint64, error) {
buf := new(syscall.Statfs_t)
if err := syscall.Statfs(loopFile, buf); err != nil {
buf := new(unix.Statfs_t)
if err := unix.Statfs(loopFile, buf); err != nil {
logrus.Warnf("devmapper: Couldn't stat loopfile filesystem %v: %v", loopFile, err)
return 0, err
}
@ -2534,22 +2634,25 @@ func NewDeviceSet(root string, doInit bool, options []string, uidMaps, gidMaps [
minFreeSpacePercent: defaultMinFreeSpacePercent,
}
// Pick up initialization settings, if any were saved before
defaultsFile := path.Join(root, "defaults")
defaultsBytes, err := ioutil.ReadFile(defaultsFile)
defaults := []string{}
settings := map[string]string{}
if err == nil && len(defaultsBytes) > 0 {
defaults = strings.Split(string(defaultsBytes), "\n")
version, err := devicemapper.GetDriverVersion()
if err != nil {
// Can't even get driver version, assume not supported
return nil, graphdriver.ErrNotSupported
}
if err := determineDriverCapabilities(version); err != nil {
return nil, graphdriver.ErrNotSupported
}
if driverDeferredRemovalSupport && devicemapper.LibraryDeferredRemovalSupport {
// enable deferred stuff by default
enableDeferredDeletion = true
enableDeferredRemoval = true
}
foundBlkDiscard := false
nthOption := 0
for _, option := range append(defaults, options...) {
nthOption = nthOption + 1
if len(option) == 0 {
continue
}
var lvmSetupConfig directLVMConfig
for _, option := range options {
key, val, err := parsers.ParseKeyValueOpt(option)
if err != nil {
return nil, err
@ -2637,15 +2740,78 @@ func NewDeviceSet(root string, doInit bool, options []string, uidMaps, gidMaps [
}
devices.minFreeSpacePercent = uint32(minFreeSpacePercent)
default:
if nthOption > len(defaults) {
return nil, fmt.Errorf("devmapper: Unknown option %s", key)
case "dm.xfs_nospace_max_retries":
_, err := strconv.ParseUint(val, 10, 64)
if err != nil {
return nil, err
}
logrus.Errorf("devmapper: Unknown option %s, ignoring", key)
devices.xfsNospaceRetries = val
case "dm.directlvm_device":
lvmSetupConfig.Device = val
case "dm.directlvm_device_force":
lvmSetupConfigForce, err = strconv.ParseBool(val)
if err != nil {
return nil, err
}
case "dm.thinp_percent":
per, err := strconv.ParseUint(strings.TrimSuffix(val, "%"), 10, 32)
if err != nil {
return nil, errors.Wrapf(err, "could not parse `dm.thinp_percent=%s`", val)
}
if per >= 100 {
return nil, errors.New("dm.thinp_percent must be greater than 0 and less than 100")
}
lvmSetupConfig.ThinpPercent = per
case "dm.thinp_metapercent":
per, err := strconv.ParseUint(strings.TrimSuffix(val, "%"), 10, 32)
if err != nil {
return nil, errors.Wrapf(err, "could not parse `dm.thinp_metapercent=%s`", val)
}
if per >= 100 {
return nil, errors.New("dm.thinp_metapercent must be greater than 0 and less than 100")
}
lvmSetupConfig.ThinpMetaPercent = per
case "dm.thinp_autoextend_percent":
per, err := strconv.ParseUint(strings.TrimSuffix(val, "%"), 10, 32)
if err != nil {
return nil, errors.Wrapf(err, "could not parse `dm.thinp_autoextend_percent=%s`", val)
}
if per > 100 {
return nil, errors.New("dm.thinp_autoextend_percent must be greater than 0 and less than 100")
}
lvmSetupConfig.AutoExtendPercent = per
case "dm.thinp_autoextend_threshold":
per, err := strconv.ParseUint(strings.TrimSuffix(val, "%"), 10, 32)
if err != nil {
return nil, errors.Wrapf(err, "could not parse `dm.thinp_autoextend_threshold=%s`", val)
}
if per > 100 {
return nil, errors.New("dm.thinp_autoextend_threshold must be greater than 0 and less than 100")
}
lvmSetupConfig.AutoExtendThreshold = per
case "dm.libdm_log_level":
level, err := strconv.ParseInt(val, 10, 32)
if err != nil {
return nil, errors.Wrapf(err, "could not parse `dm.libdm_log_level=%s`", val)
}
if level < devicemapper.LogLevelFatal || level > devicemapper.LogLevelDebug {
return nil, errors.Errorf("dm.libdm_log_level must be in range [%d,%d]", devicemapper.LogLevelFatal, devicemapper.LogLevelDebug)
}
// Register a new logging callback with the specified level.
devicemapper.LogInit(devicemapper.DefaultLogger{
Level: int(level),
})
default:
return nil, fmt.Errorf("devmapper: Unknown option %s", key)
}
settings[key] = val
}
if err := validateLVMConfig(lvmSetupConfig); err != nil {
return nil, err
}
devices.lvmSetupConfig = lvmSetupConfig
// By default, don't do blk discard hack on raw devices, its rarely useful and is expensive
if !foundBlkDiscard && (devices.dataDevice != "" || devices.thinPoolDevice != "") {
devices.doBlkDiscard = false
@ -2655,15 +2821,5 @@ func NewDeviceSet(root string, doInit bool, options []string, uidMaps, gidMaps [
return nil, err
}
// Save these settings along with the other metadata
defaults = []string{}
for key, val := range settings {
defaults = append(defaults, key+"="+val)
}
defaultsBytes = []byte(strings.Join(defaults, "\n") + "\n")
if err := ioutils.AtomicWriteFile(defaultsFile, defaultsBytes, 0600); err != nil {
return nil, err
}
return devices, nil
}

View file

@ -14,8 +14,10 @@ import (
"github.com/containers/storage/drivers"
"github.com/containers/storage/pkg/devicemapper"
"github.com/containers/storage/pkg/idtools"
"github.com/containers/storage/pkg/locker"
"github.com/containers/storage/pkg/mount"
"github.com/docker/go-units"
"github.com/containers/storage/pkg/system"
units "github.com/docker/go-units"
)
func init() {
@ -29,6 +31,7 @@ type Driver struct {
uidMaps []idtools.IDMap
gidMaps []idtools.IDMap
ctr *graphdriver.RefCounter
locker *locker.Locker
}
// Init creates a driver with the given home and the set of options.
@ -48,6 +51,7 @@ func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (grap
uidMaps: uidMaps,
gidMaps: gidMaps,
ctr: graphdriver.NewRefCounter(graphdriver.NewDefaultChecker()),
locker: locker.New(),
}
return graphdriver.NewNaiveDiffDriver(d, uidMaps, gidMaps), nil
@ -65,18 +69,18 @@ func (d *Driver) Status() [][2]string {
status := [][2]string{
{"Pool Name", s.PoolName},
{"Pool Blocksize", fmt.Sprintf("%s", units.HumanSize(float64(s.SectorSize)))},
{"Base Device Size", fmt.Sprintf("%s", units.HumanSize(float64(s.BaseDeviceSize)))},
{"Pool Blocksize", units.HumanSize(float64(s.SectorSize))},
{"Base Device Size", units.HumanSize(float64(s.BaseDeviceSize))},
{"Backing Filesystem", s.BaseDeviceFS},
{"Data file", s.DataFile},
{"Metadata file", s.MetadataFile},
{"Data Space Used", fmt.Sprintf("%s", units.HumanSize(float64(s.Data.Used)))},
{"Data Space Total", fmt.Sprintf("%s", units.HumanSize(float64(s.Data.Total)))},
{"Data Space Available", fmt.Sprintf("%s", units.HumanSize(float64(s.Data.Available)))},
{"Metadata Space Used", fmt.Sprintf("%s", units.HumanSize(float64(s.Metadata.Used)))},
{"Metadata Space Total", fmt.Sprintf("%s", units.HumanSize(float64(s.Metadata.Total)))},
{"Metadata Space Available", fmt.Sprintf("%s", units.HumanSize(float64(s.Metadata.Available)))},
{"Thin Pool Minimum Free Space", fmt.Sprintf("%s", units.HumanSize(float64(s.MinFreeSpace)))},
{"Data Space Used", units.HumanSize(float64(s.Data.Used))},
{"Data Space Total", units.HumanSize(float64(s.Data.Total))},
{"Data Space Available", units.HumanSize(float64(s.Data.Available))},
{"Metadata Space Used", units.HumanSize(float64(s.Metadata.Used))},
{"Metadata Space Total", units.HumanSize(float64(s.Metadata.Total))},
{"Metadata Space Available", units.HumanSize(float64(s.Metadata.Available))},
{"Thin Pool Minimum Free Space", units.HumanSize(float64(s.MinFreeSpace))},
{"Udev Sync Supported", fmt.Sprintf("%v", s.UdevSyncSupported)},
{"Deferred Removal Enabled", fmt.Sprintf("%v", s.DeferredRemoveEnabled)},
{"Deferred Deletion Enabled", fmt.Sprintf("%v", s.DeferredDeleteEnabled)},
@ -122,12 +126,17 @@ func (d *Driver) Cleanup() error {
// CreateReadWrite creates a layer that is writable for use as a container
// file system.
func (d *Driver) CreateReadWrite(id, parent, mountLabel string, storageOpt map[string]string) error {
return d.Create(id, parent, mountLabel, storageOpt)
func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts) error {
return d.Create(id, parent, opts)
}
// Create adds a device with a given id and the parent.
func (d *Driver) Create(id, parent, mountLabel string, storageOpt map[string]string) error {
func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) error {
var storageOpt map[string]string
if opts != nil {
storageOpt = opts.StorageOpt
}
if err := d.DeviceSet.AddDevice(id, parent, storageOpt); err != nil {
return err
}
@ -137,6 +146,8 @@ func (d *Driver) Create(id, parent, mountLabel string, storageOpt map[string]str
// Remove removes a device with a given id, unmounts the filesystem.
func (d *Driver) Remove(id string) error {
d.locker.Lock(id)
defer d.locker.Unlock(id)
if !d.DeviceSet.HasDevice(id) {
// Consider removing a non-existing device a no-op
// This is useful to be able to progress on container removal
@ -146,19 +157,15 @@ func (d *Driver) Remove(id string) error {
// This assumes the device has been properly Get/Put:ed and thus is unmounted
if err := d.DeviceSet.DeleteDevice(id, false); err != nil {
return err
return fmt.Errorf("failed to remove device %s: %v", id, err)
}
mp := path.Join(d.home, "mnt", id)
if err := os.RemoveAll(mp); err != nil && !os.IsNotExist(err) {
return err
}
return nil
return system.EnsureRemoveAll(path.Join(d.home, "mnt", id))
}
// Get mounts a device with given id into the root filesystem
func (d *Driver) Get(id, mountLabel string) (string, error) {
d.locker.Lock(id)
defer d.locker.Unlock(id)
mp := path.Join(d.home, "mnt", id)
rootFs := path.Join(mp, "rootfs")
if count := d.ctr.Increment(mp); count > 1 {
@ -209,6 +216,8 @@ func (d *Driver) Get(id, mountLabel string) (string, error) {
// Put unmounts a device and removes it.
func (d *Driver) Put(id string) error {
d.locker.Lock(id)
defer d.locker.Unlock(id)
mp := path.Join(d.home, "mnt", id)
if count := d.ctr.Decrement(mp); count > 0 {
return nil
@ -227,6 +236,5 @@ func (d *Driver) Exists(id string) bool {
// AdditionalImageStores returns additional image stores supported by the driver
func (d *Driver) AdditionalImageStores() []string {
var imageStores []string
return imageStores
return nil
}

View file

@ -7,7 +7,8 @@ import (
"fmt"
"os"
"path/filepath"
"syscall"
"golang.org/x/sys/unix"
)
// FIXME: this is copy-pasted from the aufs driver.
@ -15,19 +16,17 @@ import (
// Mounted returns true if a mount point exists.
func Mounted(mountpoint string) (bool, error) {
mntpoint, err := os.Stat(mountpoint)
if err != nil {
var mntpointSt unix.Stat_t
if err := unix.Stat(mountpoint, &mntpointSt); err != nil {
if os.IsNotExist(err) {
return false, nil
}
return false, err
}
parent, err := os.Stat(filepath.Join(mountpoint, ".."))
if err != nil {
var parentSt unix.Stat_t
if err := unix.Stat(filepath.Join(mountpoint, ".."), &parentSt); err != nil {
return false, err
}
mntpointSt := mntpoint.Sys().(*syscall.Stat_t)
parentSt := parent.Sys().(*syscall.Stat_t)
return mntpointSt.Dev != parentSt.Dev, nil
}

View file

@ -29,12 +29,19 @@ var (
// ErrNotSupported returned when driver is not supported.
ErrNotSupported = errors.New("driver not supported")
// ErrPrerequisites retuned when driver does not meet prerequisites.
// ErrPrerequisites returned when driver does not meet prerequisites.
ErrPrerequisites = errors.New("prerequisites for driver not satisfied (wrong filesystem?)")
// ErrIncompatibleFS returned when file system is not supported.
ErrIncompatibleFS = fmt.Errorf("backing file system is unsupported for this graph driver")
)
//CreateOpts contains optional arguments for Create() and CreateReadWrite()
// methods.
type CreateOpts struct {
MountLabel string
StorageOpt map[string]string
}
// InitFunc initializes the storage driver.
type InitFunc func(root string, options []string, uidMaps, gidMaps []idtools.IDMap) (Driver, error)
@ -48,11 +55,13 @@ type ProtoDriver interface {
// String returns a string representation of this driver.
String() string
// CreateReadWrite creates a new, empty filesystem layer that is ready
// to be used as the storage for a container.
CreateReadWrite(id, parent, mountLabel string, storageOpt map[string]string) error
// to be used as the storage for a container. Additional options can
// be passed in opts. parent may be "" and opts may be nil.
CreateReadWrite(id, parent string, opts *CreateOpts) error
// Create creates a new, empty, filesystem layer with the
// specified id and parent and mountLabel. Parent and mountLabel may be "".
Create(id, parent, mountLabel string, storageOpt map[string]string) error
// specified id and parent and options passed in opts. Parent
// may be "" and opts may be nil.
Create(id, parent string, opts *CreateOpts) error
// Remove attempts to remove the filesystem layer with this id.
Remove(id string) error
// Get returns the mountpoint for the layered filesystem referred
@ -79,9 +88,8 @@ type ProtoDriver interface {
AdditionalImageStores() []string
}
// Driver is the interface for layered/snapshot file system drivers.
type Driver interface {
ProtoDriver
// DiffDriver is the interface to use to implement graph diffs
type DiffDriver interface {
// Diff produces an archive of the changes between the specified
// layer and its parent layer which may be "".
Diff(id, parent string) (io.ReadCloser, error)
@ -99,6 +107,29 @@ type Driver interface {
DiffSize(id, parent string) (size int64, err error)
}
// Driver is the interface for layered/snapshot file system drivers.
type Driver interface {
ProtoDriver
DiffDriver
}
// Capabilities defines a list of capabilities a driver may implement.
// These capabilities are not required; however, they do determine how a
// graphdriver can be used.
type Capabilities struct {
// Flags that this driver is capable of reproducing exactly equivalent
// diffs for read-only layers. If set, clients can rely on the driver
// for consistent tar streams, and avoid extra processing to account
// for potential differences (eg: the layer store's use of tar-split).
ReproducesExactDiffs bool
}
// CapabilityDriver is the interface for layered file system drivers that
// can report on their Capabilities.
type CapabilityDriver interface {
Capabilities() Capabilities
}
// DiffGetterDriver is the interface for layered file system drivers that
// provide a specialized function for getting file contents for tar-split.
type DiffGetterDriver interface {
@ -137,15 +168,13 @@ func Register(name string, initFunc InitFunc) error {
}
// GetDriver initializes and returns the registered driver
func GetDriver(name, home string, options []string, uidMaps, gidMaps []idtools.IDMap) (Driver, error) {
func GetDriver(name string, config Options) (Driver, error) {
if initFunc, exists := drivers[name]; exists {
return initFunc(filepath.Join(home, name), options, uidMaps, gidMaps)
return initFunc(filepath.Join(config.Root, name), config.DriverOptions, config.UIDMaps, config.GIDMaps)
}
if pluginDriver, err := lookupPlugin(name, home, options); err == nil {
return pluginDriver, nil
}
logrus.Errorf("Failed to GetDriver graph %s %s", name, home)
return nil, errors.Wrapf(ErrNotSupported, "failed to GetDriver graph %s %s", name, home)
logrus.Errorf("Failed to GetDriver graph %s %s", name, config.Root)
return nil, errors.Wrapf(ErrNotSupported, "failed to GetDriver graph %s %s", name, config.Root)
}
// getBuiltinDriver initializes and returns the registered driver, but does not try to load from plugins
@ -157,15 +186,24 @@ func getBuiltinDriver(name, home string, options []string, uidMaps, gidMaps []id
return nil, errors.Wrapf(ErrNotSupported, "failed to built-in GetDriver graph %s %s", name, home)
}
// Options is used to initialize a graphdriver
type Options struct {
Root string
DriverOptions []string
UIDMaps []idtools.IDMap
GIDMaps []idtools.IDMap
ExperimentalEnabled bool
}
// New creates the driver and initializes it at the specified root.
func New(root string, name string, options []string, uidMaps, gidMaps []idtools.IDMap) (Driver, error) {
func New(name string, config Options) (Driver, error) {
if name != "" {
logrus.Debugf("[graphdriver] trying provided driver %q", name) // so the logs show specified driver
return GetDriver(name, root, options, uidMaps, gidMaps)
return GetDriver(name, config)
}
// Guess for prior driver
driversMap := scanPriorDrivers(root)
driversMap := scanPriorDrivers(config.Root)
for _, name := range priority {
if name == "vfs" {
// don't use vfs even if there is state present.
@ -174,13 +212,13 @@ func New(root string, name string, options []string, uidMaps, gidMaps []idtools.
if _, prior := driversMap[name]; prior {
// of the state found from prior drivers, check in order of our priority
// which we would prefer
driver, err := getBuiltinDriver(name, root, options, uidMaps, gidMaps)
driver, err := getBuiltinDriver(name, config.Root, config.DriverOptions, config.UIDMaps, config.GIDMaps)
if err != nil {
// unlike below, we will return error here, because there is prior
// state, and now it is no longer supported/prereq/compatible, so
// something changed and needs attention. Otherwise the daemon's
// images would just "disappear".
logrus.Errorf("[graphdriver] prior storage driver %q failed: %s", name, err)
logrus.Errorf("[graphdriver] prior storage driver %s failed: %s", name, err)
return nil, err
}
@ -192,17 +230,17 @@ func New(root string, name string, options []string, uidMaps, gidMaps []idtools.
driversSlice = append(driversSlice, name)
}
return nil, fmt.Errorf("%q contains several valid graphdrivers: %s; Please cleanup or explicitly choose storage driver (-s <DRIVER>)", root, strings.Join(driversSlice, ", "))
return nil, fmt.Errorf("%s contains several valid graphdrivers: %s; Please cleanup or explicitly choose storage driver (-s <DRIVER>)", config.Root, strings.Join(driversSlice, ", "))
}
logrus.Infof("[graphdriver] using prior storage driver %q", name)
logrus.Infof("[graphdriver] using prior storage driver: %s", name)
return driver, nil
}
}
// Check for priority drivers first
for _, name := range priority {
driver, err := getBuiltinDriver(name, root, options, uidMaps, gidMaps)
driver, err := getBuiltinDriver(name, config.Root, config.DriverOptions, config.UIDMaps, config.GIDMaps)
if err != nil {
if isDriverNotSupported(err) {
continue
@ -214,7 +252,7 @@ func New(root string, name string, options []string, uidMaps, gidMaps []idtools.
// Check all registered drivers if no priority driver is found
for name, initFunc := range drivers {
driver, err := initFunc(filepath.Join(root, name), options, uidMaps, gidMaps)
driver, err := initFunc(filepath.Join(config.Root, name), config.DriverOptions, config.UIDMaps, config.GIDMaps)
if err != nil {
if isDriverNotSupported(err) {
continue

View file

@ -1,6 +1,10 @@
package graphdriver
import "syscall"
import (
"syscall"
"golang.org/x/sys/unix"
)
var (
// Slice of drivers that should be used in an order
@ -11,7 +15,7 @@ var (
// Mounted checks if the given path is mounted as the fs type
func Mounted(fsType FsMagic, mountPath string) (bool, error) {
var buf syscall.Statfs_t
var buf unix.Statfs_t
if err := syscall.Statfs(mountPath, &buf); err != nil {
return false, err
}

View file

@ -4,9 +4,9 @@ package graphdriver
import (
"path/filepath"
"syscall"
"github.com/containers/storage/pkg/mount"
"golang.org/x/sys/unix"
)
const (
@ -66,13 +66,14 @@ var (
FsMagicAufs: "aufs",
FsMagicBtrfs: "btrfs",
FsMagicCramfs: "cramfs",
FsMagicEcryptfs: "ecryptfs",
FsMagicExtfs: "extfs",
FsMagicF2fs: "f2fs",
FsMagicGPFS: "gpfs",
FsMagicJffs2Fs: "jffs2",
FsMagicJfs: "jfs",
FsMagicNfsFs: "nfs",
FsMagicOverlay: "overlay",
FsMagicOverlay: "overlayfs",
FsMagicRAMFs: "ramfs",
FsMagicReiserFs: "reiserfs",
FsMagicSmbFs: "smb",
@ -87,14 +88,14 @@ var (
// GetFSMagic returns the filesystem id given the path.
func GetFSMagic(rootpath string) (FsMagic, error) {
var buf syscall.Statfs_t
if err := syscall.Statfs(filepath.Dir(rootpath), &buf); err != nil {
var buf unix.Statfs_t
if err := unix.Statfs(filepath.Dir(rootpath), &buf); err != nil {
return 0, err
}
return FsMagic(buf.Type), nil
}
// NewFsChecker returns a checker configured for the provied FsMagic
// NewFsChecker returns a checker configured for the provided FsMagic
func NewFsChecker(t FsMagic) Checker {
return &fsChecker{
t: t,
@ -126,8 +127,8 @@ func (c *defaultChecker) IsMounted(path string) bool {
// Mounted checks if the given path is mounted as the fs type
func Mounted(fsType FsMagic, mountPath string) (bool, error) {
var buf syscall.Statfs_t
if err := syscall.Statfs(mountPath, &buf); err != nil {
var buf unix.Statfs_t
if err := unix.Statfs(mountPath, &buf); err != nil {
return false, err
}
return FsMagic(buf.Type) == fsType, nil

View file

@ -19,8 +19,8 @@ import (
"path/filepath"
"unsafe"
"github.com/pkg/errors"
log "github.com/sirupsen/logrus"
"github.com/containers/storage/pkg/mount"
"github.com/sirupsen/logrus"
)
const (
@ -45,22 +45,52 @@ func GetFSMagic(rootpath string) (FsMagic, error) {
return 0, nil
}
type fsChecker struct {
t FsMagic
}
func (c *fsChecker) IsMounted(path string) bool {
m, _ := Mounted(c.t, path)
return m
}
// NewFsChecker returns a checker configured for the provided FsMagic
func NewFsChecker(t FsMagic) Checker {
return &fsChecker{
t: t,
}
}
// NewDefaultChecker returns a check that parses /proc/mountinfo to check
// if the specified path is mounted.
// No-op on Solaris.
func NewDefaultChecker() Checker {
return &defaultChecker{}
}
type defaultChecker struct {
}
func (c *defaultChecker) IsMounted(path string) bool {
m, _ := mount.Mounted(path)
return m
}
// Mounted checks if the given path is mounted as the fs type
//Solaris supports only ZFS for now
func Mounted(fsType FsMagic, mountPath string) (bool, error) {
cs := C.CString(filepath.Dir(mountPath))
defer C.free(unsafe.Pointer(cs))
buf := C.getstatfs(cs)
defer C.free(unsafe.Pointer(buf))
// on Solaris buf.f_basetype contains ['z', 'f', 's', 0 ... ]
if (buf.f_basetype[0] != 122) || (buf.f_basetype[1] != 102) || (buf.f_basetype[2] != 115) ||
(buf.f_basetype[3] != 0) {
log.Debugf("[zfs] no zfs dataset found for rootdir '%s'", mountPath)
C.free(unsafe.Pointer(buf))
return false, errors.Wrapf(graphdriver.ErrPrerequisites, "no zfs dataset found for rootdir '%s'", mountPath)
logrus.Debugf("[zfs] no zfs dataset found for rootdir '%s'", mountPath)
return false, ErrPrerequisites
}
C.free(unsafe.Pointer(buf))
C.free(unsafe.Pointer(cs))
return true, nil
}

View file

@ -36,25 +36,25 @@ type NaiveDiffDriver struct {
// ApplyDiff(id, parent string, diff io.Reader) (size int64, err error)
// DiffSize(id, parent string) (size int64, err error)
func NewNaiveDiffDriver(driver ProtoDriver, uidMaps, gidMaps []idtools.IDMap) Driver {
gdw := &NaiveDiffDriver{
ProtoDriver: driver,
uidMaps: uidMaps,
gidMaps: gidMaps,
}
return gdw
return &NaiveDiffDriver{ProtoDriver: driver,
uidMaps: uidMaps,
gidMaps: gidMaps}
}
// Diff produces an archive of the changes between the specified
// layer and its parent layer which may be "".
func (gdw *NaiveDiffDriver) Diff(id, parent string) (arch io.ReadCloser, err error) {
layerFs, err := gdw.Get(id, "")
startTime := time.Now()
driver := gdw.ProtoDriver
layerFs, err := driver.Get(id, "")
if err != nil {
return nil, err
}
defer func() {
if err != nil {
gdw.Put(id)
driver.Put(id)
}
}()
@ -65,16 +65,16 @@ func (gdw *NaiveDiffDriver) Diff(id, parent string) (arch io.ReadCloser, err err
}
return ioutils.NewReadCloserWrapper(archive, func() error {
err := archive.Close()
gdw.Put(id)
driver.Put(id)
return err
}), nil
}
parentFs, err := gdw.Get(parent, "")
parentFs, err := driver.Get(parent, "")
if err != nil {
return nil, err
}
defer gdw.Put(parent)
defer driver.Put(parent)
changes, err := archive.ChangesDirs(layerFs, parentFs)
if err != nil {
@ -88,7 +88,13 @@ func (gdw *NaiveDiffDriver) Diff(id, parent string) (arch io.ReadCloser, err err
return ioutils.NewReadCloserWrapper(archive, func() error {
err := archive.Close()
gdw.Put(id)
driver.Put(id)
// NaiveDiffDriver compares file metadata with parent layers. Parent layers
// are extracted from tar's with full second precision on modified time.
// We need this hack here to make sure calls within same second receive
// correct result.
time.Sleep(time.Until(startTime.Truncate(time.Second).Add(time.Second)))
return err
}), nil
}
@ -96,20 +102,22 @@ func (gdw *NaiveDiffDriver) Diff(id, parent string) (arch io.ReadCloser, err err
// Changes produces a list of changes between the specified layer
// and its parent layer. If parent is "", then all changes will be ADD changes.
func (gdw *NaiveDiffDriver) Changes(id, parent string) ([]archive.Change, error) {
layerFs, err := gdw.Get(id, "")
driver := gdw.ProtoDriver
layerFs, err := driver.Get(id, "")
if err != nil {
return nil, err
}
defer gdw.Put(id)
defer driver.Put(id)
parentFs := ""
if parent != "" {
parentFs, err = gdw.Get(parent, "")
parentFs, err = driver.Get(parent, "")
if err != nil {
return nil, err
}
defer gdw.Put(parent)
defer driver.Put(parent)
}
return archive.ChangesDirs(layerFs, parentFs)
@ -119,12 +127,14 @@ func (gdw *NaiveDiffDriver) Changes(id, parent string) ([]archive.Change, error)
// layer with the specified id and parent, returning the size of the
// new layer in bytes.
func (gdw *NaiveDiffDriver) ApplyDiff(id, parent string, diff io.Reader) (size int64, err error) {
driver := gdw.ProtoDriver
// Mount the root filesystem so we can apply the diff/layer.
layerFs, err := gdw.Get(id, "")
layerFs, err := driver.Get(id, "")
if err != nil {
return
}
defer gdw.Put(id)
defer driver.Put(id)
options := &archive.TarOptions{UIDMaps: gdw.uidMaps,
GIDMaps: gdw.gidMaps}
@ -142,16 +152,18 @@ func (gdw *NaiveDiffDriver) ApplyDiff(id, parent string, diff io.Reader) (size i
// and its parent and returns the size in bytes of the changes
// relative to its base filesystem directory.
func (gdw *NaiveDiffDriver) DiffSize(id, parent string) (size int64, err error) {
driver := gdw.ProtoDriver
changes, err := gdw.Changes(id, parent)
if err != nil {
return
}
layerFs, err := gdw.Get(id, "")
layerFs, err := driver.Get(id, "")
if err != nil {
return
}
defer gdw.Put(id)
defer driver.Put(id)
return archive.ChangesSize(layerFs, changes), nil
}

View file

@ -0,0 +1,79 @@
// +build linux
package overlay
import (
"fmt"
"io/ioutil"
"os"
"path"
"path/filepath"
"github.com/containers/storage/pkg/system"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
// hasOpaqueCopyUpBug checks whether the filesystem has a bug
// which copies up the opaque flag when copying up an opaque
// directory. When this bug exists naive diff should be used.
func hasOpaqueCopyUpBug(d string) error {
td, err := ioutil.TempDir(d, "opaque-bug-check")
if err != nil {
return err
}
defer func() {
if err := os.RemoveAll(td); err != nil {
logrus.Warnf("Failed to remove check directory %v: %v", td, err)
}
}()
// Make directories l1/d, l2/d, l3, work, merged
if err := os.MkdirAll(filepath.Join(td, "l1", "d"), 0755); err != nil {
return err
}
if err := os.MkdirAll(filepath.Join(td, "l2", "d"), 0755); err != nil {
return err
}
if err := os.Mkdir(filepath.Join(td, "l3"), 0755); err != nil {
return err
}
if err := os.Mkdir(filepath.Join(td, "work"), 0755); err != nil {
return err
}
if err := os.Mkdir(filepath.Join(td, "merged"), 0755); err != nil {
return err
}
// Mark l2/d as opaque
if err := system.Lsetxattr(filepath.Join(td, "l2", "d"), "trusted.overlay.opaque", []byte("y"), 0); err != nil {
return errors.Wrap(err, "failed to set opaque flag on middle layer")
}
opts := fmt.Sprintf("lowerdir=%s:%s,upperdir=%s,workdir=%s", path.Join(td, "l2"), path.Join(td, "l1"), path.Join(td, "l3"), path.Join(td, "work"))
if err := unix.Mount("overlay", filepath.Join(td, "merged"), "overlay", 0, opts); err != nil {
return errors.Wrap(err, "failed to mount overlay")
}
defer func() {
if err := unix.Unmount(filepath.Join(td, "merged"), 0); err != nil {
logrus.Warnf("Failed to unmount check directory %v: %v", filepath.Join(td, "merged"), err)
}
}()
// Touch file in d to force copy up of opaque directory "d" from "l2" to "l3"
if err := ioutil.WriteFile(filepath.Join(td, "merged", "d", "f"), []byte{}, 0644); err != nil {
return errors.Wrap(err, "failed to write to merged directory")
}
// Check l3/d does not have opaque flag
xattrOpaque, err := system.Lgetxattr(filepath.Join(td, "l3", "d"), "trusted.overlay.opaque")
if err != nil {
return errors.Wrap(err, "failed to read opaque flag on upper layer")
}
if string(xattrOpaque) == "y" {
return errors.New("opaque flag erroneously copied up, consider update to kernel 4.8 or later to fix")
}
return nil
}

View file

@ -9,9 +9,9 @@ import (
"fmt"
"os"
"runtime"
"syscall"
"github.com/containers/storage/pkg/reexec"
"golang.org/x/sys/unix"
)
func init() {
@ -31,12 +31,12 @@ type mountOptions struct {
Flag uint32
}
func mountFrom(dir, device, target, mType, label string) error {
func mountFrom(dir, device, target, mType string, flags uintptr, label string) error {
options := &mountOptions{
Device: device,
Target: target,
Type: mType,
Flag: 0,
Flag: uint32(flags),
Label: label,
}
@ -51,16 +51,18 @@ func mountFrom(dir, device, target, mType, label string) error {
cmd.Stderr = output
if err := cmd.Start(); err != nil {
w.Close()
return fmt.Errorf("mountfrom error on re-exec cmd: %v", err)
}
//write the options to the pipe for the untar exec to read
if err := json.NewEncoder(w).Encode(options); err != nil {
w.Close()
return fmt.Errorf("mountfrom json encode to pipe failed: %v", err)
}
w.Close()
if err := cmd.Wait(); err != nil {
return fmt.Errorf("mountfrom re-exec error: %v: output: %s", err, output)
return fmt.Errorf("mountfrom re-exec error: %v: output: %v", err, output)
}
return nil
}
@ -80,7 +82,7 @@ func mountFromMain() {
fatal(err)
}
if err := syscall.Mount(options.Device, options.Target, options.Type, uintptr(options.Flag), options.Label); err != nil {
if err := unix.Mount(options.Device, options.Target, options.Type, uintptr(options.Flag), options.Label); err != nil {
fatal(err)
}

View file

@ -13,21 +13,26 @@ import (
"path/filepath"
"strconv"
"strings"
"syscall"
"github.com/sirupsen/logrus"
"sync"
"github.com/containers/storage/drivers"
"github.com/containers/storage/drivers/overlayutils"
"github.com/containers/storage/drivers/quota"
"github.com/containers/storage/pkg/archive"
"github.com/containers/storage/pkg/chrootarchive"
"github.com/containers/storage/pkg/directory"
"github.com/containers/storage/pkg/fsutils"
"github.com/containers/storage/pkg/idtools"
"github.com/containers/storage/pkg/locker"
"github.com/containers/storage/pkg/mount"
"github.com/containers/storage/pkg/parsers"
"github.com/containers/storage/pkg/parsers/kernel"
"github.com/containers/storage/pkg/system"
units "github.com/docker/go-units"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
var (
@ -43,7 +48,7 @@ var (
// Each container/image has at least a "diff" directory and "link" file.
// If there is also a "lower" file when there are diff layers
// below as well as "merged" and "work" directories. The "diff" directory
// below as well as "merged" and "work" directories. The "diff" directory
// has the upper layer of the overlay and is used to capture any
// changes to the layer. The "lower" file contains all the lower layer
// mounts separated by ":" and ordered from uppermost to lowermost
@ -77,27 +82,44 @@ const (
idLength = 26
)
type overlayOptions struct {
overrideKernelCheck bool
imageStores []string
quota quota.Quota
}
// Driver contains information about the home directory and the list of active mounts that are created using this driver.
type Driver struct {
name string
home string
uidMaps []idtools.IDMap
gidMaps []idtools.IDMap
ctr *graphdriver.RefCounter
opts *overlayOptions
name string
home string
uidMaps []idtools.IDMap
gidMaps []idtools.IDMap
ctr *graphdriver.RefCounter
quotaCtl *quota.Control
options overlayOptions
naiveDiff graphdriver.DiffDriver
supportsDType bool
locker *locker.Locker
}
var backingFs = "<unknown>"
var (
backingFs = "<unknown>"
projectQuotaSupported = false
useNaiveDiffLock sync.Once
useNaiveDiffOnly bool
)
func init() {
graphdriver.Register("overlay", InitAsOverlay)
graphdriver.Register("overlay2", InitAsOverlay2)
graphdriver.Register("overlay", Init)
graphdriver.Register("overlay2", Init)
}
// InitWithName returns the a naive diff driver for the overlay filesystem,
// which returns the passed-in name when asked which driver it is.
func InitWithName(name, home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
opts, err := parseOptions(name, options)
// Init returns the a native diff driver for overlay filesystem.
// If overlay filesystem is not supported on the host, graphdriver.ErrNotSupported is returned as error.
// If an overlay filesystem is not supported over an existing filesystem then error graphdriver.ErrIncompatibleFS is returned.
func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
opts, err := parseOptions(options)
if err != nil {
return nil, err
}
@ -115,7 +137,7 @@ func InitWithName(name, home string, options []string, uidMaps, gidMaps []idtool
if !opts.overrideKernelCheck {
return nil, errors.Wrap(graphdriver.ErrNotSupported, "kernel too old to provide multiple lowers feature for overlay")
}
logrus.Warnf("Using pre-4.0.0 kernel for overlay, mount failures may require kernel update")
logrus.Warn("Using pre-4.0.0 kernel for overlay, mount failures may require kernel update")
}
fsMagic, err := graphdriver.GetFSMagic(home)
@ -128,9 +150,19 @@ func InitWithName(name, home string, options []string, uidMaps, gidMaps []idtool
// check if they are running over btrfs, aufs, zfs, overlay, or ecryptfs
switch fsMagic {
case graphdriver.FsMagicBtrfs, graphdriver.FsMagicAufs, graphdriver.FsMagicZfs, graphdriver.FsMagicOverlay, graphdriver.FsMagicEcryptfs:
case graphdriver.FsMagicAufs, graphdriver.FsMagicZfs, graphdriver.FsMagicOverlay, graphdriver.FsMagicEcryptfs:
logrus.Errorf("'overlay' is not supported over %s", backingFs)
return nil, errors.Wrapf(graphdriver.ErrIncompatibleFS, "'overlay' is not supported over %s", backingFs)
case graphdriver.FsMagicBtrfs:
// Support for OverlayFS on BTRFS was added in kernel 4.7
// See https://btrfs.wiki.kernel.org/index.php/Changelog
if kernel.CompareKernelVersion(*v, kernel.VersionInfo{Kernel: 4, Major: 7, Minor: 0}) < 0 {
if !opts.overrideKernelCheck {
logrus.Errorf("'overlay' requires kernel 4.7 to use on %s", backingFs)
return nil, errors.Wrapf(graphdriver.ErrIncompatibleFS, "'overlay' requires kernel 4.7 to use on %s", backingFs)
}
logrus.Warn("Using pre-4.7.0 kernel for overlay on btrfs, may require kernel update")
}
}
rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
@ -146,38 +178,47 @@ func InitWithName(name, home string, options []string, uidMaps, gidMaps []idtool
return nil, err
}
d := &Driver{
name: name,
home: home,
uidMaps: uidMaps,
gidMaps: gidMaps,
ctr: graphdriver.NewRefCounter(graphdriver.NewFsChecker(graphdriver.FsMagicOverlay)),
opts: opts,
supportsDType, err := fsutils.SupportsDType(home)
if err != nil {
return nil, err
}
if !supportsDType {
logrus.Warn(overlayutils.ErrDTypeNotSupported("overlay", backingFs))
// TODO: Will make fatal when CRI-O Has AMI built on RHEL7.4
// return nil, overlayutils.ErrDTypeNotSupported("overlay", backingFs)
}
d := &Driver{
name: "overlay",
home: home,
uidMaps: uidMaps,
gidMaps: gidMaps,
ctr: graphdriver.NewRefCounter(graphdriver.NewFsChecker(graphdriver.FsMagicOverlay)),
supportsDType: supportsDType,
locker: locker.New(),
options: *opts,
}
d.naiveDiff = graphdriver.NewNaiveDiffDriver(d, uidMaps, gidMaps)
if backingFs == "xfs" {
// Try to enable project quota support over xfs.
if d.quotaCtl, err = quota.NewControl(home); err == nil {
projectQuotaSupported = true
} else if opts.quota.Size > 0 {
return nil, fmt.Errorf("Storage option overlay.size not supported. Filesystem does not support Project Quota: %v", err)
}
} else if opts.quota.Size > 0 {
// if xfs is not the backing fs then error out if the storage-opt overlay.size is used.
return nil, fmt.Errorf("Storage Option overlay.size only supported for backingFS XFS. Found %v", backingFs)
}
logrus.Debugf("backingFs=%s, projectQuotaSupported=%v", backingFs, projectQuotaSupported)
return d, nil
}
// InitAsOverlay returns the a naive diff driver for overlay filesystem.
// If overlay filesystem is not supported on the host, graphdriver.ErrNotSupported is returned as error.
// If a overlay filesystem is not supported over a existing filesystem then error graphdriver.ErrIncompatibleFS is returned.
func InitAsOverlay(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
return InitWithName("overlay", home, options, uidMaps, gidMaps)
}
// InitAsOverlay2 returns the a naive diff driver for overlay filesystem.
// If overlay filesystem is not supported on the host, graphdriver.ErrNotSupported is returned as error.
// If a overlay filesystem is not supported over a existing filesystem then error graphdriver.ErrIncompatibleFS is returned.
func InitAsOverlay2(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
return InitWithName("overlay2", home, options, uidMaps, gidMaps)
}
type overlayOptions struct {
overrideKernelCheck bool
imageStores []string
}
func parseOptions(name string, options []string) (*overlayOptions, error) {
func parseOptions(options []string) (*overlayOptions, error) {
o := &overlayOptions{}
for _, option := range options {
key, val, err := parsers.ParseKeyValueOpt(option)
@ -187,28 +228,37 @@ func parseOptions(name string, options []string) (*overlayOptions, error) {
key = strings.ToLower(key)
switch key {
case "overlay.override_kernel_check", "overlay2.override_kernel_check":
logrus.Debugf("overlay: overide_kernelcheck=%s", val)
o.overrideKernelCheck, err = strconv.ParseBool(val)
if err != nil {
return nil, err
}
case "overlay.size", "overlay2.size":
logrus.Debugf("overlay: size=%s", val)
size, err := units.RAMInBytes(val)
if err != nil {
return nil, err
}
o.quota.Size = uint64(size)
case "overlay.imagestore", "overlay2.imagestore":
logrus.Debugf("overlay: imagestore=%s", val)
// Additional read only image stores to use for lower paths
for _, store := range strings.Split(val, ",") {
store = filepath.Clean(store)
if !filepath.IsAbs(store) {
return nil, fmt.Errorf("%s: image path %q is not absolute. Can not be relative", name, store)
return nil, fmt.Errorf("overlay: image path %q is not absolute. Can not be relative", store)
}
st, err := os.Stat(store)
if err != nil {
return nil, fmt.Errorf("%s: Can't stat imageStore dir %s: %v", name, store, err)
return nil, fmt.Errorf("overlay: can't stat imageStore dir %s: %v", store, err)
}
if !st.IsDir() {
return nil, fmt.Errorf("%s: image path %q must be a directory", name, store)
return nil, fmt.Errorf("overlay: image path %q must be a directory", store)
}
o.imageStores = append(o.imageStores, store)
}
default:
return nil, fmt.Errorf("%s: Unknown option %s", name, key)
return nil, fmt.Errorf("overlay: Unknown option %s", key)
}
}
return o, nil
@ -235,6 +285,16 @@ func supportsOverlay() error {
return errors.Wrap(graphdriver.ErrNotSupported, "'overlay' not found as a supported filesystem on this host. Please ensure kernel is new enough and has overlay support loaded.")
}
func useNaiveDiff(home string) bool {
useNaiveDiffLock.Do(func() {
if err := hasOpaqueCopyUpBug(home); err != nil {
logrus.Warnf("Not using native diff for overlay: %v", err)
useNaiveDiffOnly = true
}
})
return useNaiveDiffOnly
}
func (d *Driver) String() string {
return d.name
}
@ -244,6 +304,8 @@ func (d *Driver) String() string {
func (d *Driver) Status() [][2]string {
return [][2]string{
{"Backing Filesystem", backingFs},
{"Supports d_type", strconv.FormatBool(d.supportsDType)},
{"Native Overlay Diff", strconv.FormatBool(!useNaiveDiff(d.home))},
}
}
@ -281,18 +343,39 @@ func (d *Driver) Cleanup() error {
// CreateReadWrite creates a layer that is writable for use as a container
// file system.
func (d *Driver) CreateReadWrite(id, parent, mountLabel string, storageOpt map[string]string) error {
return d.Create(id, parent, mountLabel, storageOpt)
func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts) error {
if opts != nil && len(opts.StorageOpt) != 0 && !projectQuotaSupported {
return fmt.Errorf("--storage-opt is supported only for overlay over xfs with 'pquota' mount option")
}
if opts == nil {
opts = &graphdriver.CreateOpts{
StorageOpt: map[string]string{},
}
}
if _, ok := opts.StorageOpt["size"]; !ok {
if opts.StorageOpt == nil {
opts.StorageOpt = map[string]string{}
}
opts.StorageOpt["size"] = strconv.FormatUint(d.options.quota.Size, 10)
}
return d.create(id, parent, opts)
}
// Create is used to create the upper, lower, and merge directories required for overlay fs for a given id.
// The parent filesystem is used to configure these directories for the overlay.
func (d *Driver) Create(id, parent, mountLabel string, storageOpt map[string]string) (retErr error) {
if len(storageOpt) != 0 {
return fmt.Errorf("--storage-opt is not supported for overlay")
func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) (retErr error) {
if opts != nil && len(opts.StorageOpt) != 0 {
if _, ok := opts.StorageOpt["size"]; ok {
return fmt.Errorf("--storage-opt size is only supported for ReadWrite Layers")
}
}
return d.create(id, parent, opts)
}
func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts) (retErr error) {
dir := d.dir(id)
rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
@ -313,6 +396,20 @@ func (d *Driver) Create(id, parent, mountLabel string, storageOpt map[string]str
}
}()
if opts != nil && len(opts.StorageOpt) > 0 {
driver := &Driver{}
if err := d.parseStorageOpt(opts.StorageOpt, driver); err != nil {
return err
}
if driver.options.quota.Size > 0 {
// Set container disk quota limit
if err := d.quotaCtl.SetQuota(dir, driver.options.quota); err != nil {
return err
}
}
}
if err := idtools.MkdirAs(path.Join(dir, "diff"), 0755, rootUID, rootGID); err != nil {
return err
}
@ -352,6 +449,26 @@ func (d *Driver) Create(id, parent, mountLabel string, storageOpt map[string]str
return nil
}
// Parse overlay storage options
func (d *Driver) parseStorageOpt(storageOpt map[string]string, driver *Driver) error {
// Read size to set the disk project quota per container
for key, val := range storageOpt {
key := strings.ToLower(key)
switch key {
case "size":
size, err := units.RAMInBytes(val)
if err != nil {
return err
}
driver.options.quota.Size = uint64(size)
default:
return fmt.Errorf("Unknown option %s", key)
}
}
return nil
}
func (d *Driver) getLower(parent string) (string, error) {
parentDir := d.dir(parent)
@ -378,11 +495,11 @@ func (d *Driver) getLower(parent string) (string, error) {
return strings.Join(lowers, ":"), nil
}
func (d *Driver) dir(val string) string {
newpath := path.Join(d.home, val)
func (d *Driver) dir(id string) string {
newpath := path.Join(d.home, id)
if _, err := os.Stat(newpath); err != nil {
for _, p := range d.AdditionalImageStores() {
l := path.Join(p, d.name, val)
l := path.Join(p, d.name, id)
_, err = os.Stat(l)
if err == nil {
return l
@ -412,6 +529,8 @@ func (d *Driver) getLowerDirs(id string) ([]string, error) {
// Remove cleans the directories that are created for this id.
func (d *Driver) Remove(id string) error {
d.locker.Lock(id)
defer d.locker.Unlock(id)
dir := d.dir(id)
lid, err := ioutil.ReadFile(path.Join(dir, "link"))
if err == nil {
@ -420,14 +539,16 @@ func (d *Driver) Remove(id string) error {
}
}
if err := os.RemoveAll(dir); err != nil && !os.IsNotExist(err) {
if err := system.EnsureRemoveAll(dir); err != nil && !os.IsNotExist(err) {
return err
}
return nil
}
// Get creates and mounts the required file system for the given id and returns the mount path.
func (d *Driver) Get(id string, mountLabel string) (s string, err error) {
func (d *Driver) Get(id, mountLabel string) (_ string, retErr error) {
d.locker.Lock(id)
defer d.locker.Unlock(id)
dir := d.dir(id)
if _, err := os.Stat(dir); err != nil {
return "", err
@ -459,7 +580,7 @@ func (d *Driver) Get(id string, mountLabel string) (s string, err error) {
return "", fmt.Errorf("Can't stat lower layer %q: %v", newpath, err)
}
} else {
lower = l
lower = newpath
}
if newlowers == "" {
newlowers = lower
@ -473,22 +594,42 @@ func (d *Driver) Get(id string, mountLabel string) (s string, err error) {
return mergedDir, nil
}
defer func() {
if err != nil {
if retErr != nil {
if c := d.ctr.Decrement(mergedDir); c <= 0 {
syscall.Unmount(mergedDir, 0)
if mntErr := unix.Unmount(mergedDir, 0); mntErr != nil {
logrus.Errorf("error unmounting %v: %v", mergedDir, mntErr)
}
}
}
}()
workDir := path.Join(dir, "work")
opts := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", newlowers, path.Join(id, "diff"), path.Join(id, "work"))
mountLabel = label.FormatMountLabel(opts, mountLabel)
if len(mountLabel) > syscall.Getpagesize() {
return "", fmt.Errorf("cannot mount layer, mount label too large %d", len(mountLabel))
}
opts := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", newlowers, diffDir, workDir)
mountData := label.FormatMountLabel(opts, mountLabel)
mount := unix.Mount
mountTarget := mergedDir
if err := mountFrom(d.home, "overlay", path.Join(id, "merged"), "overlay", mountLabel); err != nil {
return "", fmt.Errorf("error creating overlay mount to %s: %v", mergedDir, err)
pageSize := unix.Getpagesize()
// Use relative paths and mountFrom when the mount data has exceeded
// the page size. The mount syscall fails if the mount data cannot
// fit within a page and relative links make the mount data much
// smaller at the expense of requiring a fork exec to chroot.
if len(mountData) > pageSize {
//FIXME: We need to figure out to get this to work with additional stores
opts = fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", string(lowers), path.Join(id, "diff"), path.Join(id, "work"))
mountData = label.FormatMountLabel(opts, mountLabel)
if len(mountData) > pageSize {
return "", fmt.Errorf("cannot mount layer, mount label too large %d", len(mountData))
}
mount = func(source string, target string, mType string, flags uintptr, label string) error {
return mountFrom(d.home, source, target, mType, flags, label)
}
mountTarget = path.Join(id, "merged")
}
if err := mount("overlay", mountTarget, "overlay", 0, mountData); err != nil {
return "", fmt.Errorf("error creating overlay mount to %s: %v", mountTarget, err)
}
// chown "workdir/work" to the remapped root UID/GID. Overlay fs inside a
@ -507,19 +648,17 @@ func (d *Driver) Get(id string, mountLabel string) (s string, err error) {
// Put unmounts the mount path created for the give id.
func (d *Driver) Put(id string) error {
d.locker.Lock(id)
defer d.locker.Unlock(id)
mountpoint := path.Join(d.dir(id), "merged")
if count := d.ctr.Decrement(mountpoint); count > 0 {
return nil
}
err := syscall.Unmount(mountpoint, 0)
err := unix.Unmount(mountpoint, unix.MNT_DETACH)
if err != nil {
if _, err := ioutil.ReadFile(path.Join(d.dir(id), lowerFile)); err != nil {
// We didn't have a "lower" directory, so we weren't mounting a "merged" directory anyway
return nil
}
logrus.Debugf("Failed to unmount %s %s: %v", id, d.name, err)
logrus.Debugf("Failed to unmount %s overlay: %s - %v", id, mountpoint, err)
}
return err
return nil
}
// Exists checks to see if the id is already mounted.
@ -528,8 +667,33 @@ func (d *Driver) Exists(id string) bool {
return err == nil
}
// isParent returns if the passed in parent is the direct parent of the passed in layer
func (d *Driver) isParent(id, parent string) bool {
lowers, err := d.getLowerDirs(id)
if err != nil {
return false
}
if parent == "" && len(lowers) > 0 {
return false
}
parentDir := d.dir(parent)
var ld string
if len(lowers) > 0 {
ld = filepath.Dir(lowers[0])
}
if ld == "" && parent == "" {
return true
}
return ld == parentDir
}
// ApplyDiff applies the new layer into a root
func (d *Driver) ApplyDiff(id string, parent string, diff io.Reader) (size int64, err error) {
if !d.isParent(id, parent) {
return d.naiveDiff.ApplyDiff(id, parent, diff)
}
applyDir := d.getDiffPath(id)
logrus.Debugf("Applying tar in %s", applyDir)
@ -542,7 +706,7 @@ func (d *Driver) ApplyDiff(id string, parent string, diff io.Reader) (size int64
return 0, err
}
return d.DiffSize(id, parent)
return directory.Size(applyDir)
}
func (d *Driver) getDiffPath(id string) string {
@ -555,12 +719,19 @@ func (d *Driver) getDiffPath(id string) string {
// and its parent and returns the size in bytes of the changes
// relative to its base filesystem directory.
func (d *Driver) DiffSize(id, parent string) (size int64, err error) {
if useNaiveDiff(d.home) || !d.isParent(id, parent) {
return d.naiveDiff.DiffSize(id, parent)
}
return directory.Size(d.getDiffPath(id))
}
// Diff produces an archive of the changes between the specified
// layer and its parent layer which may be "".
func (d *Driver) Diff(id, parent string) (io.ReadCloser, error) {
if useNaiveDiff(d.home) || !d.isParent(id, parent) {
return d.naiveDiff.Diff(id, parent)
}
diffPath := d.getDiffPath(id)
logrus.Debugf("Tar with options on %s", diffPath)
return archive.TarWithOptions(diffPath, &archive.TarOptions{
@ -574,6 +745,9 @@ func (d *Driver) Diff(id, parent string) (io.ReadCloser, error) {
// Changes produces a list of changes between the specified layer
// and its parent layer. If parent is "", then all changes will be ADD changes.
func (d *Driver) Changes(id, parent string) ([]archive.Change, error) {
if useNaiveDiff(d.home) || !d.isParent(id, parent) {
return d.naiveDiff.Changes(id, parent)
}
// Overlay doesn't have snapshots, so we need to get changes from all parent
// layers.
diffPath := d.getDiffPath(id)
@ -587,5 +761,5 @@ func (d *Driver) Changes(id, parent string) ([]archive.Change, error) {
// AdditionalImageStores returns additional image stores supported by the driver
func (d *Driver) AdditionalImageStores() []string {
return d.opts.imageStores
return d.options.imageStores
}

View file

@ -12,6 +12,7 @@ import (
"time"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
// generateID creates a new random string identifier with the given length
@ -69,7 +70,7 @@ func retryOnError(err error) bool {
case *os.PathError:
return retryOnError(err.Err) // unpack the target error
case syscall.Errno:
if err == syscall.EPERM {
if err == unix.EPERM {
// EPERM represents an entropy pool exhaustion, a condition under
// which we backoff and retry.
return true

View file

@ -0,0 +1,18 @@
// +build linux
package overlayutils
import (
"errors"
"fmt"
)
// ErrDTypeNotSupported denotes that the backing filesystem doesn't support d_type.
func ErrDTypeNotSupported(driver, backingFs string) error {
msg := fmt.Sprintf("%s: the backing %s filesystem is formatted without d_type support, which leads to incorrect behavior.", driver, backingFs)
if backingFs == "xfs" {
msg += " Reformat the filesystem with ftype=1 to enable d_type support."
}
msg += " Running without d_type is not supported."
return errors.New(msg)
}

View file

@ -1,32 +0,0 @@
// +build experimental
package graphdriver
import (
"fmt"
"io"
"github.com/containers/storage/pkg/plugins"
)
type pluginClient interface {
// Call calls the specified method with the specified arguments for the plugin.
Call(string, interface{}, interface{}) error
// Stream calls the specified method with the specified arguments for the plugin and returns the response IO stream
Stream(string, interface{}) (io.ReadCloser, error)
// SendFile calls the specified method, and passes through the IO stream
SendFile(string, io.Reader, interface{}) error
}
func lookupPlugin(name, home string, opts []string) (Driver, error) {
pl, err := plugins.Get(name, "GraphDriver")
if err != nil {
return nil, fmt.Errorf("Error looking up graphdriver plugin %s: %v", name, err)
}
return newPluginDriver(name, home, opts, pl.Client())
}
func newPluginDriver(name, home string, opts []string, c pluginClient) (Driver, error) {
proxy := &graphDriverProxy{name, c}
return proxy, proxy.Init(home, opts)
}

View file

@ -1,7 +0,0 @@
// +build !experimental
package graphdriver
func lookupPlugin(name, home string, opts []string) (Driver, error) {
return nil, ErrNotSupported
}

View file

@ -1,226 +0,0 @@
// +build experimental
package graphdriver
import (
"fmt"
"io"
"github.com/containers/storage/pkg/archive"
"github.com/pkg/errors"
)
type graphDriverProxy struct {
name string
client pluginClient
}
type graphDriverRequest struct {
ID string `json:",omitempty"`
Parent string `json:",omitempty"`
MountLabel string `json:",omitempty"`
}
type graphDriverResponse struct {
Err string `json:",omitempty"`
Dir string `json:",omitempty"`
Exists bool `json:",omitempty"`
Status [][2]string `json:",omitempty"`
Changes []archive.Change `json:",omitempty"`
Size int64 `json:",omitempty"`
Metadata map[string]string `json:",omitempty"`
}
type graphDriverInitRequest struct {
Home string
Opts []string
}
func (d *graphDriverProxy) Init(home string, opts []string) error {
args := &graphDriverInitRequest{
Home: home,
Opts: opts,
}
var ret graphDriverResponse
if err := d.client.Call("GraphDriver.Init", args, &ret); err != nil {
return err
}
if ret.Err != "" {
return errors.New(ret.Err)
}
return nil
}
func (d *graphDriverProxy) String() string {
return d.name
}
func (d *graphDriverProxy) CreateReadWrite(id, parent, mountLabel string, storageOpt map[string]string) error {
args := &graphDriverRequest{
ID: id,
Parent: parent,
MountLabel: mountLabel,
}
var ret graphDriverResponse
if err := d.client.Call("GraphDriver.CreateReadWrite", args, &ret); err != nil {
return err
}
if ret.Err != "" {
return errors.New(ret.Err)
}
return nil
}
func (d *graphDriverProxy) Create(id, parent, mountLabel string, storageOpt map[string]string) error {
args := &graphDriverRequest{
ID: id,
Parent: parent,
MountLabel: mountLabel,
}
var ret graphDriverResponse
if err := d.client.Call("GraphDriver.Create", args, &ret); err != nil {
return err
}
if ret.Err != "" {
return errors.New(ret.Err)
}
return nil
}
func (d *graphDriverProxy) Remove(id string) error {
args := &graphDriverRequest{ID: id}
var ret graphDriverResponse
if err := d.client.Call("GraphDriver.Remove", args, &ret); err != nil {
return err
}
if ret.Err != "" {
return errors.New(ret.Err)
}
return nil
}
func (d *graphDriverProxy) Get(id, mountLabel string) (string, error) {
args := &graphDriverRequest{
ID: id,
MountLabel: mountLabel,
}
var ret graphDriverResponse
if err := d.client.Call("GraphDriver.Get", args, &ret); err != nil {
return "", err
}
var err error
if ret.Err != "" {
err = errors.New(ret.Err)
}
return ret.Dir, err
}
func (d *graphDriverProxy) Put(id string) error {
args := &graphDriverRequest{ID: id}
var ret graphDriverResponse
if err := d.client.Call("GraphDriver.Put", args, &ret); err != nil {
return err
}
if ret.Err != "" {
return errors.New(ret.Err)
}
return nil
}
func (d *graphDriverProxy) Exists(id string) bool {
args := &graphDriverRequest{ID: id}
var ret graphDriverResponse
if err := d.client.Call("GraphDriver.Exists", args, &ret); err != nil {
return false
}
return ret.Exists
}
func (d *graphDriverProxy) Status() [][2]string {
args := &graphDriverRequest{}
var ret graphDriverResponse
if err := d.client.Call("GraphDriver.Status", args, &ret); err != nil {
return nil
}
return ret.Status
}
func (d *graphDriverProxy) Metadata(id string) (map[string]string, error) {
args := &graphDriverRequest{
ID: id,
}
var ret graphDriverResponse
if err := d.client.Call("GraphDriver.Metadata", args, &ret); err != nil {
return nil, err
}
if ret.Err != "" {
return nil, errors.New(ret.Err)
}
return ret.Metadata, nil
}
func (d *graphDriverProxy) Cleanup() error {
args := &graphDriverRequest{}
var ret graphDriverResponse
if err := d.client.Call("GraphDriver.Cleanup", args, &ret); err != nil {
return nil
}
if ret.Err != "" {
return errors.New(ret.Err)
}
return nil
}
func (d *graphDriverProxy) Diff(id, parent string) (io.ReadCloser, error) {
args := &graphDriverRequest{
ID: id,
Parent: parent,
}
body, err := d.client.Stream("GraphDriver.Diff", args)
if err != nil {
return nil, err
}
return io.ReadClose(body), nil
}
func (d *graphDriverProxy) Changes(id, parent string) ([]archive.Change, error) {
args := &graphDriverRequest{
ID: id,
Parent: parent,
}
var ret graphDriverResponse
if err := d.client.Call("GraphDriver.Changes", args, &ret); err != nil {
return nil, err
}
if ret.Err != "" {
return nil, errors.New(ret.Err)
}
return ret.Changes, nil
}
func (d *graphDriverProxy) ApplyDiff(id, parent string, diff io.Reader) (int64, error) {
var ret graphDriverResponse
if err := d.client.SendFile(fmt.Sprintf("GraphDriver.ApplyDiff?id=%s&parent=%s", id, parent), diff, &ret); err != nil {
return -1, err
}
if ret.Err != "" {
return -1, errors.New(ret.Err)
}
return ret.Size, nil
}
func (d *graphDriverProxy) DiffSize(id, parent string) (int64, error) {
args := &graphDriverRequest{
ID: id,
Parent: parent,
}
var ret graphDriverResponse
if err := d.client.Call("GraphDriver.DiffSize", args, &ret); err != nil {
return -1, err
}
if ret.Err != "" {
return -1, errors.New(ret.Err)
}
return ret.Size, nil
}

View file

@ -0,0 +1,337 @@
// +build linux
//
// projectquota.go - implements XFS project quota controls
// for setting quota limits on a newly created directory.
// It currently supports the legacy XFS specific ioctls.
//
// TODO: use generic quota control ioctl FS_IOC_FS{GET,SET}XATTR
// for both xfs/ext4 for kernel version >= v4.5
//
package quota
/*
#include <stdlib.h>
#include <dirent.h>
#include <linux/fs.h>
#include <linux/quota.h>
#include <linux/dqblk_xfs.h>
#ifndef FS_XFLAG_PROJINHERIT
struct fsxattr {
__u32 fsx_xflags;
__u32 fsx_extsize;
__u32 fsx_nextents;
__u32 fsx_projid;
unsigned char fsx_pad[12];
};
#define FS_XFLAG_PROJINHERIT 0x00000200
#endif
#ifndef FS_IOC_FSGETXATTR
#define FS_IOC_FSGETXATTR _IOR ('X', 31, struct fsxattr)
#endif
#ifndef FS_IOC_FSSETXATTR
#define FS_IOC_FSSETXATTR _IOW ('X', 32, struct fsxattr)
#endif
#ifndef PRJQUOTA
#define PRJQUOTA 2
#endif
#ifndef XFS_PROJ_QUOTA
#define XFS_PROJ_QUOTA 2
#endif
#ifndef Q_XSETPQLIM
#define Q_XSETPQLIM QCMD(Q_XSETQLIM, PRJQUOTA)
#endif
#ifndef Q_XGETPQUOTA
#define Q_XGETPQUOTA QCMD(Q_XGETQUOTA, PRJQUOTA)
#endif
*/
import "C"
import (
"fmt"
"io/ioutil"
"path"
"path/filepath"
"unsafe"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
// Quota limit params - currently we only control blocks hard limit
type Quota struct {
Size uint64
}
// Control - Context to be used by storage driver (e.g. overlay)
// who wants to apply project quotas to container dirs
type Control struct {
backingFsBlockDev string
nextProjectID uint32
quotas map[string]uint32
}
// NewControl - initialize project quota support.
// Test to make sure that quota can be set on a test dir and find
// the first project id to be used for the next container create.
//
// Returns nil (and error) if project quota is not supported.
//
// First get the project id of the home directory.
// This test will fail if the backing fs is not xfs.
//
// xfs_quota tool can be used to assign a project id to the driver home directory, e.g.:
// echo 999:/var/lib/containers/storage/overlay >> /etc/projects
// echo storage:999 >> /etc/projid
// xfs_quota -x -c 'project -s storage' /<xfs mount point>
//
// In that case, the home directory project id will be used as a "start offset"
// and all containers will be assigned larger project ids (e.g. >= 1000).
// This is a way to prevent xfs_quota management from conflicting with containers/storage.
//
// Then try to create a test directory with the next project id and set a quota
// on it. If that works, continue to scan existing containers to map allocated
// project ids.
//
func NewControl(basePath string) (*Control, error) {
//
// Get project id of parent dir as minimal id to be used by driver
//
minProjectID, err := getProjectID(basePath)
if err != nil {
return nil, err
}
minProjectID++
//
// create backing filesystem device node
//
backingFsBlockDev, err := makeBackingFsDev(basePath)
if err != nil {
return nil, err
}
//
// Test if filesystem supports project quotas by trying to set
// a quota on the first available project id
//
quota := Quota{
Size: 0,
}
if err := setProjectQuota(backingFsBlockDev, minProjectID, quota); err != nil {
return nil, err
}
q := Control{
backingFsBlockDev: backingFsBlockDev,
nextProjectID: minProjectID + 1,
quotas: make(map[string]uint32),
}
//
// get first project id to be used for next container
//
err = q.findNextProjectID(basePath)
if err != nil {
return nil, err
}
logrus.Debugf("NewControl(%s): nextProjectID = %d", basePath, q.nextProjectID)
return &q, nil
}
// SetQuota - assign a unique project id to directory and set the quota limits
// for that project id
func (q *Control) SetQuota(targetPath string, quota Quota) error {
projectID, ok := q.quotas[targetPath]
if !ok {
projectID = q.nextProjectID
//
// assign project id to new container directory
//
err := setProjectID(targetPath, projectID)
if err != nil {
return err
}
q.quotas[targetPath] = projectID
q.nextProjectID++
}
//
// set the quota limit for the container's project id
//
logrus.Debugf("SetQuota(%s, %d): projectID=%d", targetPath, quota.Size, projectID)
return setProjectQuota(q.backingFsBlockDev, projectID, quota)
}
// setProjectQuota - set the quota for project id on xfs block device
func setProjectQuota(backingFsBlockDev string, projectID uint32, quota Quota) error {
var d C.fs_disk_quota_t
d.d_version = C.FS_DQUOT_VERSION
d.d_id = C.__u32(projectID)
d.d_flags = C.XFS_PROJ_QUOTA
d.d_fieldmask = C.FS_DQ_BHARD | C.FS_DQ_BSOFT
d.d_blk_hardlimit = C.__u64(quota.Size / 512)
d.d_blk_softlimit = d.d_blk_hardlimit
var cs = C.CString(backingFsBlockDev)
defer C.free(unsafe.Pointer(cs))
_, _, errno := unix.Syscall6(unix.SYS_QUOTACTL, C.Q_XSETPQLIM,
uintptr(unsafe.Pointer(cs)), uintptr(d.d_id),
uintptr(unsafe.Pointer(&d)), 0, 0)
if errno != 0 {
return fmt.Errorf("Failed to set quota limit for projid %d on %s: %v",
projectID, backingFsBlockDev, errno.Error())
}
return nil
}
// GetQuota - get the quota limits of a directory that was configured with SetQuota
func (q *Control) GetQuota(targetPath string, quota *Quota) error {
projectID, ok := q.quotas[targetPath]
if !ok {
return fmt.Errorf("quota not found for path : %s", targetPath)
}
//
// get the quota limit for the container's project id
//
var d C.fs_disk_quota_t
var cs = C.CString(q.backingFsBlockDev)
defer C.free(unsafe.Pointer(cs))
_, _, errno := unix.Syscall6(unix.SYS_QUOTACTL, C.Q_XGETPQUOTA,
uintptr(unsafe.Pointer(cs)), uintptr(C.__u32(projectID)),
uintptr(unsafe.Pointer(&d)), 0, 0)
if errno != 0 {
return fmt.Errorf("Failed to get quota limit for projid %d on %s: %v",
projectID, q.backingFsBlockDev, errno.Error())
}
quota.Size = uint64(d.d_blk_hardlimit) * 512
return nil
}
// getProjectID - get the project id of path on xfs
func getProjectID(targetPath string) (uint32, error) {
dir, err := openDir(targetPath)
if err != nil {
return 0, err
}
defer closeDir(dir)
var fsx C.struct_fsxattr
_, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.FS_IOC_FSGETXATTR,
uintptr(unsafe.Pointer(&fsx)))
if errno != 0 {
return 0, fmt.Errorf("Failed to get projid for %s: %v", targetPath, errno.Error())
}
return uint32(fsx.fsx_projid), nil
}
// setProjectID - set the project id of path on xfs
func setProjectID(targetPath string, projectID uint32) error {
dir, err := openDir(targetPath)
if err != nil {
return err
}
defer closeDir(dir)
var fsx C.struct_fsxattr
_, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.FS_IOC_FSGETXATTR,
uintptr(unsafe.Pointer(&fsx)))
if errno != 0 {
return fmt.Errorf("Failed to get projid for %s: %v", targetPath, errno.Error())
}
fsx.fsx_projid = C.__u32(projectID)
fsx.fsx_xflags |= C.FS_XFLAG_PROJINHERIT
_, _, errno = unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.FS_IOC_FSSETXATTR,
uintptr(unsafe.Pointer(&fsx)))
if errno != 0 {
return fmt.Errorf("Failed to set projid for %s: %v", targetPath, errno.Error())
}
return nil
}
// findNextProjectID - find the next project id to be used for containers
// by scanning driver home directory to find used project ids
func (q *Control) findNextProjectID(home string) error {
files, err := ioutil.ReadDir(home)
if err != nil {
return fmt.Errorf("read directory failed : %s", home)
}
for _, file := range files {
if !file.IsDir() {
continue
}
path := filepath.Join(home, file.Name())
projid, err := getProjectID(path)
if err != nil {
return err
}
if projid > 0 {
q.quotas[path] = projid
}
if q.nextProjectID <= projid {
q.nextProjectID = projid + 1
}
}
return nil
}
func free(p *C.char) {
C.free(unsafe.Pointer(p))
}
func openDir(path string) (*C.DIR, error) {
Cpath := C.CString(path)
defer free(Cpath)
dir := C.opendir(Cpath)
if dir == nil {
return nil, fmt.Errorf("Can't open dir")
}
return dir, nil
}
func closeDir(dir *C.DIR) {
if dir != nil {
C.closedir(dir)
}
}
func getDirFd(dir *C.DIR) uintptr {
return uintptr(C.dirfd(dir))
}
// Get the backing block device of the driver home directory
// and create a block device node under the home directory
// to be used by quotactl commands
func makeBackingFsDev(home string) (string, error) {
var stat unix.Stat_t
if err := unix.Stat(home, &stat); err != nil {
return "", err
}
backingFsBlockDev := path.Join(home, "backingFsBlockDev")
// Re-create just in case someone copied the home directory over to a new device
unix.Unlink(backingFsBlockDev)
if err := unix.Mknod(backingFsBlockDev, unix.S_IFBLK|0600, int(stat.Dev)); err != nil {
return "", fmt.Errorf("Failed to mknod %s: %v", backingFsBlockDev, err)
}
return backingFsBlockDev, nil
}

View file

@ -8,7 +8,7 @@ import (
"github.com/containers/storage/drivers"
"github.com/containers/storage/pkg/chrootarchive"
"github.com/containers/storage/pkg/idtools"
"github.com/containers/storage/pkg/system"
"github.com/opencontainers/selinux/go-selinux/label"
)
@ -25,15 +25,11 @@ func init() {
// This sets the home directory for the driver and returns NaiveDiffDriver.
func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
d := &Driver{
home: home,
uidMaps: uidMaps,
gidMaps: gidMaps,
home: home,
idMappings: idtools.NewIDMappingsFromMaps(uidMaps, gidMaps),
}
rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
if err != nil {
return nil, err
}
if err := idtools.MkdirAllAs(home, 0700, rootUID, rootGID); err != nil {
rootIDs := d.idMappings.RootPair()
if err := idtools.MkdirAllAndChown(home, 0700, rootIDs); err != nil {
return nil, err
}
return graphdriver.NewNaiveDiffDriver(d, uidMaps, gidMaps), nil
@ -44,9 +40,8 @@ func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (grap
// In order to support layering, files are copied from the parent layer into the new layer. There is no copy-on-write support.
// Driver must be wrapped in NaiveDiffDriver to be used as a graphdriver.Driver
type Driver struct {
home string
uidMaps []idtools.IDMap
gidMaps []idtools.IDMap
home string
idMappings *idtools.IDMappings
}
func (d *Driver) String() string {
@ -70,29 +65,26 @@ func (d *Driver) Cleanup() error {
// CreateReadWrite creates a layer that is writable for use as a container
// file system.
func (d *Driver) CreateReadWrite(id, parent, mountLabel string, storageOpt map[string]string) error {
return d.Create(id, parent, mountLabel, storageOpt)
func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts) error {
return d.Create(id, parent, opts)
}
// Create prepares the filesystem for the VFS driver and copies the directory for the given id under the parent.
func (d *Driver) Create(id, parent, mountLabel string, storageOpt map[string]string) error {
if len(storageOpt) != 0 {
func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) error {
if opts != nil && len(opts.StorageOpt) != 0 {
return fmt.Errorf("--storage-opt is not supported for vfs")
}
dir := d.dir(id)
rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
if err != nil {
rootIDs := d.idMappings.RootPair()
if err := idtools.MkdirAllAndChown(filepath.Dir(dir), 0700, rootIDs); err != nil {
return err
}
if err := idtools.MkdirAllAs(filepath.Dir(dir), 0700, rootUID, rootGID); err != nil {
if err := idtools.MkdirAndChown(dir, 0755, rootIDs); err != nil {
return err
}
if err := idtools.MkdirAs(dir, 0755, rootUID, rootGID); err != nil {
return err
}
opts := []string{"level:s0"}
if _, mountLabel, err := label.InitLabels(opts); err == nil {
labelOpts := []string{"level:s0"}
if _, mountLabel, err := label.InitLabels(labelOpts); err == nil {
label.SetFileLabel(dir, mountLabel)
}
if parent == "" {
@ -102,10 +94,7 @@ func (d *Driver) Create(id, parent, mountLabel string, storageOpt map[string]str
if err != nil {
return fmt.Errorf("%s: %s", parent, err)
}
if err := CopyWithTar(parentDir, dir); err != nil {
return err
}
return nil
return CopyWithTar(parentDir, dir)
}
func (d *Driver) dir(id string) string {
@ -114,10 +103,7 @@ func (d *Driver) dir(id string) string {
// Remove deletes the content from the directory for a given id.
func (d *Driver) Remove(id string) error {
if err := os.RemoveAll(d.dir(id)); err != nil && !os.IsNotExist(err) {
return err
}
return nil
return system.EnsureRemoveAll(d.dir(id))
}
// Get returns the directory for the given id.
@ -146,6 +132,5 @@ func (d *Driver) Exists(id string) bool {
// AdditionalImageStores returns additional image stores supported by the driver
func (d *Driver) AdditionalImageStores() []string {
var imageStores []string
return imageStores
return nil
}

View file

@ -6,6 +6,7 @@ import (
"bufio"
"bytes"
"encoding/json"
"errors"
"fmt"
"io"
"io/ioutil"
@ -16,6 +17,7 @@ import (
"strings"
"sync"
"syscall"
"time"
"unsafe"
"github.com/Microsoft/go-winio"
@ -29,17 +31,37 @@ import (
"github.com/containers/storage/pkg/longpath"
"github.com/containers/storage/pkg/reexec"
"github.com/containers/storage/pkg/system"
units "github.com/docker/go-units"
"github.com/sirupsen/logrus"
"github.com/vbatts/tar-split/tar/storage"
"golang.org/x/sys/windows"
)
// filterDriver is an HCSShim driver type for the Windows Filter driver.
const filterDriver = 1
var (
// mutatedFiles is a list of files that are mutated by the import process
// and must be backed up and restored.
mutatedFiles = map[string]string{
"UtilityVM/Files/EFI/Microsoft/Boot/BCD": "bcd.bak",
"UtilityVM/Files/EFI/Microsoft/Boot/BCD.LOG": "bcd.log.bak",
"UtilityVM/Files/EFI/Microsoft/Boot/BCD.LOG1": "bcd.log1.bak",
"UtilityVM/Files/EFI/Microsoft/Boot/BCD.LOG2": "bcd.log2.bak",
}
noreexec = false
)
// init registers the windows graph drivers to the register.
func init() {
graphdriver.Register("windowsfilter", InitFilter)
reexec.Register("storage-windows-write-layer", writeLayer)
// DOCKER_WINDOWSFILTER_NOREEXEC allows for inline processing which makes
// debugging issues in the re-exec codepath significantly easier.
if os.Getenv("DOCKER_WINDOWSFILTER_NOREEXEC") != "" {
logrus.Warnf("WindowsGraphDriver is set to not re-exec. This is intended for debugging purposes only.")
noreexec = true
} else {
reexec.Register("docker-windows-write-layer", writeLayerReexec)
}
}
type checker struct {
@ -60,13 +82,22 @@ type Driver struct {
cache map[string]string
}
func isTP5OrOlder() bool {
return system.GetOSVersion().Build <= 14300
}
// InitFilter returns a new Windows storage filter driver.
func InitFilter(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
logrus.Debugf("WindowsGraphDriver InitFilter at %s", home)
fsType, err := getFileSystemType(string(home[0]))
if err != nil {
return nil, err
}
if strings.ToLower(fsType) == "refs" {
return nil, fmt.Errorf("%s is on an ReFS volume - ReFS volumes are not supported", home)
}
if err := idtools.MkdirAllAs(home, 0700, 0, 0); err != nil {
return nil, fmt.Errorf("windowsfilter failed to create '%s': %v", home, err)
}
d := &Driver{
info: hcsshim.DriverInfo{
HomeDir: home,
@ -78,6 +109,37 @@ func InitFilter(home string, options []string, uidMaps, gidMaps []idtools.IDMap)
return d, nil
}
// win32FromHresult is a helper function to get the win32 error code from an HRESULT
func win32FromHresult(hr uintptr) uintptr {
if hr&0x1fff0000 == 0x00070000 {
return hr & 0xffff
}
return hr
}
// getFileSystemType obtains the type of a file system through GetVolumeInformation
// https://msdn.microsoft.com/en-us/library/windows/desktop/aa364993(v=vs.85).aspx
func getFileSystemType(drive string) (fsType string, hr error) {
var (
modkernel32 = windows.NewLazySystemDLL("kernel32.dll")
procGetVolumeInformation = modkernel32.NewProc("GetVolumeInformationW")
buf = make([]uint16, 255)
size = windows.MAX_PATH + 1
)
if len(drive) != 1 {
hr = errors.New("getFileSystemType must be called with a drive letter")
return
}
drive += `:\`
n := uintptr(unsafe.Pointer(nil))
r0, _, _ := syscall.Syscall9(procGetVolumeInformation.Addr(), 8, uintptr(unsafe.Pointer(windows.StringToUTF16Ptr(drive))), n, n, n, n, n, uintptr(unsafe.Pointer(&buf[0])), uintptr(size), 0)
if int32(r0) < 0 {
hr = syscall.Errno(win32FromHresult(r0))
}
fsType = windows.UTF16ToString(buf)
return
}
// String returns the string representation of a driver. This should match
// the name the graph driver has been registered with.
func (d *Driver) String() string {
@ -91,8 +153,19 @@ func (d *Driver) Status() [][2]string {
}
}
// panicIfUsedByLcow does exactly what it says.
// TODO @jhowardmsft - this is a temporary measure for the bring-up of
// Linux containers on Windows. It is a failsafe to ensure that the right
// graphdriver is used.
func panicIfUsedByLcow() {
if system.LCOWSupported() {
panic("inconsistency - windowsfilter graphdriver should not be used when in LCOW mode")
}
}
// Exists returns true if the given id is registered with this driver.
func (d *Driver) Exists(id string) bool {
panicIfUsedByLcow()
rID, err := d.resolveID(id)
if err != nil {
return false
@ -106,20 +179,24 @@ func (d *Driver) Exists(id string) bool {
// CreateReadWrite creates a layer that is writable for use as a container
// file system.
func (d *Driver) CreateReadWrite(id, parent, mountLabel string, storageOpt map[string]string) error {
return d.create(id, parent, mountLabel, false, storageOpt)
func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts) error {
panicIfUsedByLcow()
if opts != nil {
return d.create(id, parent, opts.MountLabel, false, opts.StorageOpt)
}
return d.create(id, parent, "", false, nil)
}
// Create creates a new read-only layer with the given id.
func (d *Driver) Create(id, parent, mountLabel string, storageOpt map[string]string) error {
return d.create(id, parent, mountLabel, true, storageOpt)
func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) error {
panicIfUsedByLcow()
if opts != nil {
return d.create(id, parent, opts.MountLabel, true, opts.StorageOpt)
}
return d.create(id, parent, "", true, nil)
}
func (d *Driver) create(id, parent, mountLabel string, readOnly bool, storageOpt map[string]string) error {
if len(storageOpt) != 0 {
return fmt.Errorf("--storage-opt is not supported for windows")
}
rPId, err := d.resolveID(parent)
if err != nil {
return err
@ -133,7 +210,7 @@ func (d *Driver) create(id, parent, mountLabel string, readOnly bool, storageOpt
var layerChain []string
if rPId != "" {
parentPath, err := hcsshim.LayerMountPath(d.info, rPId)
parentPath, err := hcsshim.GetLayerMountPath(d.info, rPId)
if err != nil {
return err
}
@ -156,32 +233,20 @@ func (d *Driver) create(id, parent, mountLabel string, readOnly bool, storageOpt
parentPath = layerChain[0]
}
if isTP5OrOlder() {
// Pre-create the layer directory, providing an ACL to give the Hyper-V Virtual Machines
// group access. This is necessary to ensure that Hyper-V containers can access the
// virtual machine data. This is not necessary post-TP5.
path, err := syscall.UTF16FromString(filepath.Join(d.info.HomeDir, id))
if err != nil {
return err
}
// Give system and administrators full control, and VMs read, write, and execute.
// Mark these ACEs as inherited.
sd, err := winio.SddlToSecurityDescriptor("D:(A;OICI;FA;;;SY)(A;OICI;FA;;;BA)(A;OICI;FRFWFX;;;S-1-5-83-0)")
if err != nil {
return err
}
err = syscall.CreateDirectory(&path[0], &syscall.SecurityAttributes{
Length: uint32(unsafe.Sizeof(syscall.SecurityAttributes{})),
SecurityDescriptor: uintptr(unsafe.Pointer(&sd[0])),
})
if err != nil {
return err
}
}
if err := hcsshim.CreateSandboxLayer(d.info, id, parentPath, layerChain); err != nil {
return err
}
storageOptions, err := parseStorageOpt(storageOpt)
if err != nil {
return fmt.Errorf("Failed to parse storage options - %s", err)
}
if storageOptions.size != 0 {
if err := hcsshim.ExpandSandboxSize(d.info, id, storageOptions.size); err != nil {
return err
}
}
}
if _, err := os.Lstat(d.dir(parent)); err != nil {
@ -208,16 +273,89 @@ func (d *Driver) dir(id string) string {
// Remove unmounts and removes the dir information.
func (d *Driver) Remove(id string) error {
panicIfUsedByLcow()
rID, err := d.resolveID(id)
if err != nil {
return err
}
os.RemoveAll(filepath.Join(d.info.HomeDir, "sysfile-backups", rID)) // ok to fail
return hcsshim.DestroyLayer(d.info, rID)
// This retry loop is due to a bug in Windows (Internal bug #9432268)
// if GetContainers fails with ErrVmcomputeOperationInvalidState
// it is a transient error. Retry until it succeeds.
var computeSystems []hcsshim.ContainerProperties
retryCount := 0
osv := system.GetOSVersion()
for {
// Get and terminate any template VMs that are currently using the layer.
// Note: It is unfortunate that we end up in the graphdrivers Remove() call
// for both containers and images, but the logic for template VMs is only
// needed for images - specifically we are looking to see if a base layer
// is in use by a template VM as a result of having started a Hyper-V
// container at some point.
//
// We have a retry loop for ErrVmcomputeOperationInvalidState and
// ErrVmcomputeOperationAccessIsDenied as there is a race condition
// in RS1 and RS2 building during enumeration when a silo is going away
// for example under it, in HCS. AccessIsDenied added to fix 30278.
//
// TODO @jhowardmsft - For RS3, we can remove the retries. Also consider
// using platform APIs (if available) to get this more succinctly. Also
// consider enhancing the Remove() interface to have context of why
// the remove is being called - that could improve efficiency by not
// enumerating compute systems during a remove of a container as it's
// not required.
computeSystems, err = hcsshim.GetContainers(hcsshim.ComputeSystemQuery{})
if err != nil {
if (osv.Build < 15139) &&
((err == hcsshim.ErrVmcomputeOperationInvalidState) || (err == hcsshim.ErrVmcomputeOperationAccessIsDenied)) {
if retryCount >= 500 {
break
}
retryCount++
time.Sleep(10 * time.Millisecond)
continue
}
return err
}
break
}
for _, computeSystem := range computeSystems {
if strings.Contains(computeSystem.RuntimeImagePath, id) && computeSystem.IsRuntimeTemplate {
container, err := hcsshim.OpenContainer(computeSystem.ID)
if err != nil {
return err
}
defer container.Close()
err = container.Terminate()
if hcsshim.IsPending(err) {
err = container.Wait()
} else if hcsshim.IsAlreadyStopped(err) {
err = nil
}
if err != nil {
return err
}
}
}
layerPath := filepath.Join(d.info.HomeDir, rID)
tmpID := fmt.Sprintf("%s-removing", rID)
tmpLayerPath := filepath.Join(d.info.HomeDir, tmpID)
if err := os.Rename(layerPath, tmpLayerPath); err != nil && !os.IsNotExist(err) {
return err
}
if err := hcsshim.DestroyLayer(d.info, tmpID); err != nil {
logrus.Errorf("Failed to DestroyLayer %s: %s", id, err)
}
return nil
}
// Get returns the rootfs path for the id. This will mount the dir at it's given path.
// Get returns the rootfs path for the id. This will mount the dir at its given path.
func (d *Driver) Get(id, mountLabel string) (string, error) {
panicIfUsedByLcow()
logrus.Debugf("WindowsGraphDriver Get() id %s mountLabel %s", id, mountLabel)
var dir string
@ -248,9 +386,12 @@ func (d *Driver) Get(id, mountLabel string) (string, error) {
return "", err
}
mountPath, err := hcsshim.LayerMountPath(d.info, rID)
mountPath, err := hcsshim.GetLayerMountPath(d.info, rID)
if err != nil {
d.ctr.Decrement(rID)
if err := hcsshim.UnprepareLayer(d.info, rID); err != nil {
logrus.Warnf("Failed to Unprepare %s: %s", id, err)
}
if err2 := hcsshim.DeactivateLayer(d.info, rID); err2 != nil {
logrus.Warnf("Failed to Deactivate %s: %s", id, err)
}
@ -273,6 +414,7 @@ func (d *Driver) Get(id, mountLabel string) (string, error) {
// Put adds a new layer to the driver.
func (d *Driver) Put(id string) error {
panicIfUsedByLcow()
logrus.Debugf("WindowsGraphDriver Put() id %s", id)
rID, err := d.resolveID(id)
@ -283,9 +425,15 @@ func (d *Driver) Put(id string) error {
return nil
}
d.cacheMu.Lock()
_, exists := d.cache[rID]
delete(d.cache, rID)
d.cacheMu.Unlock()
// If the cache was not populated, then the layer was left unprepared and deactivated
if !exists {
return nil
}
if err := hcsshim.UnprepareLayer(d.info, rID); err != nil {
return err
}
@ -293,7 +441,31 @@ func (d *Driver) Put(id string) error {
}
// Cleanup ensures the information the driver stores is properly removed.
// We use this opportunity to cleanup any -removing folders which may be
// still left if the daemon was killed while it was removing a layer.
func (d *Driver) Cleanup() error {
items, err := ioutil.ReadDir(d.info.HomeDir)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
// Note we don't return an error below - it's possible the files
// are locked. However, next time around after the daemon exits,
// we likely will be able to to cleanup successfully. Instead we log
// warnings if there are errors.
for _, item := range items {
if item.IsDir() && strings.HasSuffix(item.Name(), "-removing") {
if err := hcsshim.DestroyLayer(d.info, item.Name()); err != nil {
logrus.Warnf("Failed to cleanup %s: %s", item.Name(), err)
} else {
logrus.Infof("Cleaned up %s", item.Name())
}
}
}
return nil
}
@ -301,6 +473,7 @@ func (d *Driver) Cleanup() error {
// layer and its parent layer which may be "".
// The layer should be mounted when calling this function
func (d *Driver) Diff(id, parent string) (_ io.ReadCloser, err error) {
panicIfUsedByLcow()
rID, err := d.resolveID(id)
if err != nil {
return
@ -335,8 +508,9 @@ func (d *Driver) Diff(id, parent string) (_ io.ReadCloser, err error) {
// Changes produces a list of changes between the specified layer
// and its parent layer. If parent is "", then all changes will be ADD changes.
// The layer should be mounted when calling this function
// The layer should not be mounted when calling this function.
func (d *Driver) Changes(id, parent string) ([]archive.Change, error) {
panicIfUsedByLcow()
rID, err := d.resolveID(id)
if err != nil {
return nil, err
@ -346,13 +520,12 @@ func (d *Driver) Changes(id, parent string) ([]archive.Change, error) {
return nil, err
}
// this is assuming that the layer is unmounted
if err := hcsshim.UnprepareLayer(d.info, rID); err != nil {
if err := hcsshim.ActivateLayer(d.info, rID); err != nil {
return nil, err
}
defer func() {
if err := hcsshim.PrepareLayer(d.info, rID, parentChain); err != nil {
logrus.Warnf("Failed to Deactivate %s: %s", rID, err)
if err2 := hcsshim.DeactivateLayer(d.info, rID); err2 != nil {
logrus.Errorf("changes() failed to DeactivateLayer %s %s: %s", id, rID, err2)
}
}()
@ -392,7 +565,8 @@ func (d *Driver) Changes(id, parent string) ([]archive.Change, error) {
// layer with the specified id and parent, returning the size of the
// new layer in bytes.
// The layer should not be mounted when calling this function
func (d *Driver) ApplyDiff(id, parent string, diff archive.Reader) (int64, error) {
func (d *Driver) ApplyDiff(id, parent string, diff io.Reader) (int64, error) {
panicIfUsedByLcow()
var layerChain []string
if parent != "" {
rPId, err := d.resolveID(parent)
@ -403,7 +577,7 @@ func (d *Driver) ApplyDiff(id, parent string, diff archive.Reader) (int64, error
if err != nil {
return 0, err
}
parentPath, err := hcsshim.LayerMountPath(d.info, rPId)
parentPath, err := hcsshim.GetLayerMountPath(d.info, rPId)
if err != nil {
return 0, err
}
@ -427,6 +601,7 @@ func (d *Driver) ApplyDiff(id, parent string, diff archive.Reader) (int64, error
// and its parent and returns the size in bytes of the changes
// relative to its base filesystem directory.
func (d *Driver) DiffSize(id, parent string) (size int64, err error) {
panicIfUsedByLcow()
rPId, err := d.resolveID(parent)
if err != nil {
return
@ -448,6 +623,7 @@ func (d *Driver) DiffSize(id, parent string) (size int64, err error) {
// Metadata returns custom driver information.
func (d *Driver) Metadata(id string) (map[string]string, error) {
panicIfUsedByLcow()
m := make(map[string]string)
m["dir"] = d.dir(id)
return m, nil
@ -505,7 +681,48 @@ func (d *Driver) exportLayer(id string, parentLayerPaths []string) (io.ReadClose
return archive, nil
}
func writeLayerFromTar(r archive.Reader, w hcsshim.LayerWriter) (int64, error) {
// writeBackupStreamFromTarAndSaveMutatedFiles reads data from a tar stream and
// writes it to a backup stream, and also saves any files that will be mutated
// by the import layer process to a backup location.
func writeBackupStreamFromTarAndSaveMutatedFiles(buf *bufio.Writer, w io.Writer, t *tar.Reader, hdr *tar.Header, root string) (nextHdr *tar.Header, err error) {
var bcdBackup *os.File
var bcdBackupWriter *winio.BackupFileWriter
if backupPath, ok := mutatedFiles[hdr.Name]; ok {
bcdBackup, err = os.Create(filepath.Join(root, backupPath))
if err != nil {
return nil, err
}
defer func() {
cerr := bcdBackup.Close()
if err == nil {
err = cerr
}
}()
bcdBackupWriter = winio.NewBackupFileWriter(bcdBackup, false)
defer func() {
cerr := bcdBackupWriter.Close()
if err == nil {
err = cerr
}
}()
buf.Reset(io.MultiWriter(w, bcdBackupWriter))
} else {
buf.Reset(w)
}
defer func() {
ferr := buf.Flush()
if err == nil {
err = ferr
}
}()
return backuptar.WriteBackupStreamFromTarFile(buf, t, hdr)
}
func writeLayerFromTar(r io.Reader, w hcsshim.LayerWriter, root string) (int64, error) {
t := tar.NewReader(r)
hdr, err := t.Next()
totalSize := int64(0)
@ -539,30 +756,7 @@ func writeLayerFromTar(r archive.Reader, w hcsshim.LayerWriter) (int64, error) {
if err != nil {
return 0, err
}
buf.Reset(w)
// Add the Hyper-V Virtual Machine group ACE to the security descriptor
// for TP5 so that Xenons can access all files. This is not necessary
// for post-TP5 builds.
if isTP5OrOlder() {
if sddl, ok := hdr.Winheaders["sd"]; ok {
var ace string
if hdr.Typeflag == tar.TypeDir {
ace = "(A;OICI;0x1200a9;;;S-1-5-83-0)"
} else {
ace = "(A;;0x1200a9;;;S-1-5-83-0)"
}
if hdr.Winheaders["sd"], ok = addAceToSddlDacl(sddl, ace); !ok {
logrus.Debugf("failed to add VM ACE to %s", sddl)
}
}
}
hdr, err = backuptar.WriteBackupStreamFromTarFile(buf, t, hdr)
ferr := buf.Flush()
if ferr != nil {
err = ferr
}
hdr, err = writeBackupStreamFromTarAndSaveMutatedFiles(buf, w, t, hdr, root)
totalSize += size
}
}
@ -572,105 +766,75 @@ func writeLayerFromTar(r archive.Reader, w hcsshim.LayerWriter) (int64, error) {
return totalSize, nil
}
func addAceToSddlDacl(sddl, ace string) (string, bool) {
daclStart := strings.Index(sddl, "D:")
if daclStart < 0 {
return sddl, false
}
dacl := sddl[daclStart:]
daclEnd := strings.Index(dacl, "S:")
if daclEnd < 0 {
daclEnd = len(dacl)
}
dacl = dacl[:daclEnd]
if strings.Contains(dacl, ace) {
return sddl, true
}
i := 2
for i+1 < len(dacl) {
if dacl[i] != '(' {
return sddl, false
}
if dacl[i+1] == 'A' {
break
}
i += 2
for p := 1; i < len(dacl) && p > 0; i++ {
if dacl[i] == '(' {
p++
} else if dacl[i] == ')' {
p--
}
}
}
return sddl[:daclStart+i] + ace + sddl[daclStart+i:], true
}
// importLayer adds a new layer to the tag and graph store based on the given data.
func (d *Driver) importLayer(id string, layerData archive.Reader, parentLayerPaths []string) (size int64, err error) {
cmd := reexec.Command(append([]string{"storage-windows-write-layer", d.info.HomeDir, id}, parentLayerPaths...)...)
output := bytes.NewBuffer(nil)
cmd.Stdin = layerData
cmd.Stdout = output
cmd.Stderr = output
func (d *Driver) importLayer(id string, layerData io.Reader, parentLayerPaths []string) (size int64, err error) {
if !noreexec {
cmd := reexec.Command(append([]string{"docker-windows-write-layer", d.info.HomeDir, id}, parentLayerPaths...)...)
output := bytes.NewBuffer(nil)
cmd.Stdin = layerData
cmd.Stdout = output
cmd.Stderr = output
if err = cmd.Start(); err != nil {
return
if err = cmd.Start(); err != nil {
return
}
if err = cmd.Wait(); err != nil {
return 0, fmt.Errorf("re-exec error: %v: output: %s", err, output)
}
return strconv.ParseInt(output.String(), 10, 64)
}
if err = cmd.Wait(); err != nil {
return 0, fmt.Errorf("re-exec error: %v: output: %s", err, output)
}
return strconv.ParseInt(output.String(), 10, 64)
return writeLayer(layerData, d.info.HomeDir, id, parentLayerPaths...)
}
// writeLayer is the re-exec entry point for writing a layer from a tar file
func writeLayer() {
home := os.Args[1]
id := os.Args[2]
parentLayerPaths := os.Args[3:]
err := func() error {
err := winio.EnableProcessPrivileges([]string{winio.SeBackupPrivilege, winio.SeRestorePrivilege})
if err != nil {
return err
}
info := hcsshim.DriverInfo{
Flavour: filterDriver,
HomeDir: home,
}
w, err := hcsshim.NewLayerWriter(info, id, parentLayerPaths)
if err != nil {
return err
}
size, err := writeLayerFromTar(os.Stdin, w)
if err != nil {
return err
}
err = w.Close()
if err != nil {
return err
}
fmt.Fprint(os.Stdout, size)
return nil
}()
// writeLayerReexec is the re-exec entry point for writing a layer from a tar file
func writeLayerReexec() {
size, err := writeLayer(os.Stdin, os.Args[1], os.Args[2], os.Args[3:]...)
if err != nil {
fmt.Fprint(os.Stderr, err)
os.Exit(1)
}
fmt.Fprint(os.Stdout, size)
}
// writeLayer writes a layer from a tar file.
func writeLayer(layerData io.Reader, home string, id string, parentLayerPaths ...string) (int64, error) {
err := winio.EnableProcessPrivileges([]string{winio.SeBackupPrivilege, winio.SeRestorePrivilege})
if err != nil {
return 0, err
}
if noreexec {
defer func() {
if err := winio.DisableProcessPrivileges([]string{winio.SeBackupPrivilege, winio.SeRestorePrivilege}); err != nil {
// This should never happen, but just in case when in debugging mode.
// See https://github.com/docker/docker/pull/28002#discussion_r86259241 for rationale.
panic("Failed to disabled process privileges while in non re-exec mode")
}
}()
}
info := hcsshim.DriverInfo{
Flavour: filterDriver,
HomeDir: home,
}
w, err := hcsshim.NewLayerWriter(info, id, parentLayerPaths)
if err != nil {
return 0, err
}
size, err := writeLayerFromTar(layerData, w, filepath.Join(home, id))
if err != nil {
return 0, err
}
err = w.Close()
if err != nil {
return 0, err
}
return size, nil
}
// resolveID computes the layerID information based on the given id.
@ -686,11 +850,7 @@ func (d *Driver) resolveID(id string) (string, error) {
// setID stores the layerId in disk.
func (d *Driver) setID(id, altID string) error {
err := ioutil.WriteFile(filepath.Join(d.dir(id), "layerId"), []byte(altID), 0600)
if err != nil {
return err
}
return nil
return ioutil.WriteFile(filepath.Join(d.dir(id), "layerId"), []byte(altID), 0600)
}
// getLayerChain returns the layer chain information.
@ -733,17 +893,23 @@ type fileGetCloserWithBackupPrivileges struct {
}
func (fg *fileGetCloserWithBackupPrivileges) Get(filename string) (io.ReadCloser, error) {
if backupPath, ok := mutatedFiles[filename]; ok {
return os.Open(filepath.Join(fg.path, backupPath))
}
var f *os.File
// Open the file while holding the Windows backup privilege. This ensures that the
// file can be opened even if the caller does not actually have access to it according
// to the security descriptor.
// to the security descriptor. Also use sequential file access to avoid depleting the
// standby list - Microsoft VSO Bug Tracker #9900466
err := winio.RunWithPrivilege(winio.SeBackupPrivilege, func() error {
path := longpath.AddPrefix(filepath.Join(fg.path, filename))
p, err := syscall.UTF16FromString(path)
p, err := windows.UTF16FromString(path)
if err != nil {
return err
}
h, err := syscall.CreateFile(&p[0], syscall.GENERIC_READ, syscall.FILE_SHARE_READ, nil, syscall.OPEN_EXISTING, syscall.FILE_FLAG_BACKUP_SEMANTICS, 0)
const fileFlagSequentialScan = 0x08000000 // FILE_FLAG_SEQUENTIAL_SCAN
h, err := windows.CreateFile(&p[0], windows.GENERIC_READ, windows.FILE_SHARE_READ, nil, windows.OPEN_EXISTING, windows.FILE_FLAG_BACKUP_SEMANTICS|fileFlagSequentialScan, 0)
if err != nil {
return &os.PathError{Op: "open", Path: path, Err: err}
}
@ -757,19 +923,10 @@ func (fg *fileGetCloserWithBackupPrivileges) Close() error {
return nil
}
type fileGetDestroyCloser struct {
storage.FileGetter
path string
}
func (f *fileGetDestroyCloser) Close() error {
// TODO: activate layers and release here?
return os.RemoveAll(f.path)
}
// DiffGetter returns a FileGetCloser that can read files from the directory that
// contains files for the layer differences. Used for direct access for tar-split.
func (d *Driver) DiffGetter(id string) (graphdriver.FileGetCloser, error) {
panicIfUsedByLcow()
id, err := d.resolveID(id)
if err != nil {
return nil, err
@ -777,3 +934,32 @@ func (d *Driver) DiffGetter(id string) (graphdriver.FileGetCloser, error) {
return &fileGetCloserWithBackupPrivileges{d.dir(id)}, nil
}
// AdditionalImageStores returns additional image stores supported by the driver
func (d *Driver) AdditionalImageStores() []string {
return nil
}
type storageOptions struct {
size uint64
}
func parseStorageOpt(storageOpt map[string]string) (*storageOptions, error) {
options := storageOptions{}
// Read size to change the block device size per container.
for key, val := range storageOpt {
key := strings.ToLower(key)
switch key {
case "size":
size, err := units.RAMInBytes(val)
if err != nil {
return nil, err
}
options.size = uint64(size)
default:
return nil, fmt.Errorf("Unknown storage option: %s", key)
}
}
return &options, nil
}

View file

@ -10,7 +10,6 @@ import (
"strconv"
"strings"
"sync"
"syscall"
"time"
"github.com/containers/storage/drivers"
@ -21,6 +20,7 @@ import (
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
type zfsOptions struct {
@ -100,6 +100,14 @@ func Init(base string, opt []string, uidMaps, gidMaps []idtools.IDMap) (graphdri
return nil, fmt.Errorf("BUG: zfs get all -t filesystem -rHp '%s' should contain '%s'", options.fsName, options.fsName)
}
rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
if err != nil {
return nil, fmt.Errorf("Failed to get root uid/guid: %v", err)
}
if err := idtools.MkdirAllAs(base, 0700, rootUID, rootGID); err != nil {
return nil, fmt.Errorf("Failed to create '%s': %v", base, err)
}
if err := mount.MakePrivate(base); err != nil {
return nil, err
}
@ -134,8 +142,8 @@ func parseOptions(opt []string) (zfsOptions, error) {
}
func lookupZfsDataset(rootdir string) (string, error) {
var stat syscall.Stat_t
if err := syscall.Stat(rootdir, &stat); err != nil {
var stat unix.Stat_t
if err := unix.Stat(rootdir, &stat); err != nil {
return "", fmt.Errorf("Failed to access '%s': %s", rootdir, err)
}
wantedDev := stat.Dev
@ -145,7 +153,7 @@ func lookupZfsDataset(rootdir string) (string, error) {
return "", err
}
for _, m := range mounts {
if err := syscall.Stat(m.Mountpoint, &stat); err != nil {
if err := unix.Stat(m.Mountpoint, &stat); err != nil {
logrus.Debugf("[zfs] failed to stat '%s' while scanning for zfs mount: %v", m.Mountpoint, err)
continue // may fail on fuse file systems
}
@ -213,7 +221,10 @@ func (d *Driver) Status() [][2]string {
// Metadata returns image/container metadata related to graph driver
func (d *Driver) Metadata(id string) (map[string]string, error) {
return nil, nil
return map[string]string{
"Mountpoint": d.mountPath(id),
"Dataset": d.zfsPath(id),
}, nil
}
func (d *Driver) cloneFilesystem(name, parentName string) error {
@ -248,12 +259,17 @@ func (d *Driver) mountPath(id string) string {
// CreateReadWrite creates a layer that is writable for use as a container
// file system.
func (d *Driver) CreateReadWrite(id, parent, mountLabel string, storageOpt map[string]string) error {
return d.Create(id, parent, mountLabel, storageOpt)
func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts) error {
return d.Create(id, parent, opts)
}
// Create prepares the dataset and filesystem for the ZFS driver for the given id under the parent.
func (d *Driver) Create(id string, parent string, mountLabel string, storageOpt map[string]string) error {
func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) error {
var storageOpt map[string]string
if opts != nil {
storageOpt = opts.StorageOpt
}
err := d.create(id, parent, storageOpt)
if err == nil {
return nil
@ -391,22 +407,20 @@ func (d *Driver) Put(id string) error {
logrus.Debugf(`[zfs] unmount("%s")`, mountpoint)
err = mount.Unmount(mountpoint)
if err != nil {
if err := mount.Unmount(mountpoint); err != nil {
return fmt.Errorf("error unmounting to %s: %v", mountpoint, err)
}
return err
return nil
}
// Exists checks to see if the cache entry exists for the given id.
func (d *Driver) Exists(id string) bool {
d.Lock()
defer d.Unlock()
return d.filesystemsCache[d.zfsPath(id)] == true
return d.filesystemsCache[d.zfsPath(id)]
}
// AdditionalImageStores returns additional image stores supported by the driver
func (d *Driver) AdditionalImageStores() []string {
var imageStores []string
return imageStores
return nil
}

View file

@ -3,16 +3,16 @@ package zfs
import (
"fmt"
"strings"
"syscall"
"github.com/containers/storage/drivers"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
func checkRootdirFs(rootdir string) error {
var buf syscall.Statfs_t
if err := syscall.Statfs(rootdir, &buf); err != nil {
var buf unix.Statfs_t
if err := unix.Statfs(rootdir, &buf); err != nil {
return fmt.Errorf("Failed to access '%s': %s", rootdir, err)
}

View file

@ -2,16 +2,16 @@ package zfs
import (
"fmt"
"syscall"
"github.com/containers/storage/drivers"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
func checkRootdirFs(rootdir string) error {
var buf syscall.Statfs_t
if err := syscall.Statfs(rootdir, &buf); err != nil {
var buf unix.Statfs_t
if err := unix.Statfs(rootdir, &buf); err != nil {
return fmt.Errorf("Failed to access '%s': %s", rootdir, err)
}

View file

@ -22,24 +22,23 @@ import (
"github.com/containers/storage/drivers"
"github.com/pkg/errors"
log "github.com/sirupsen/logrus"
"github.com/sirupsen/logrus"
)
func checkRootdirFs(rootdir string) error {
cs := C.CString(filepath.Dir(rootdir))
defer C.free(unsafe.Pointer(cs))
buf := C.getstatfs(cs)
defer C.free(unsafe.Pointer(buf))
// on Solaris buf.f_basetype contains ['z', 'f', 's', 0 ... ]
if (buf.f_basetype[0] != 122) || (buf.f_basetype[1] != 102) || (buf.f_basetype[2] != 115) ||
(buf.f_basetype[3] != 0) {
log.Debugf("[zfs] no zfs dataset found for rootdir '%s'", rootdir)
C.free(unsafe.Pointer(buf))
logrus.Debugf("[zfs] no zfs dataset found for rootdir '%s'", rootdir)
return errors.Wrapf(graphdriver.ErrPrerequisites, "no zfs dataset found for rootdir '%s'", rootdir)
}
C.free(unsafe.Pointer(buf))
C.free(unsafe.Pointer(cs))
return nil
}