Files
docker-docs/daemon/graphdriver/aufs/aufs.go
Dan Walsh 1716d497a4 Relabel BTRFS Content on container Creation
This change will allow us to run SELinux in a container with
BTRFS back end.  We continue to work on fixing the kernel/BTRFS
but this change will allow SELinux Security separation on BTRFS.

It basically relabels the content on container creation.

Just relabling -init directory in BTRFS use case. Everything looks like it
works. I don't believe tar/achive stores the SELinux labels, so we are good
as far as docker commit.

Tested Speed on startup with BTRFS on top of loopback directory. BTRFS
not on loopback should get even better perfomance on startup time.  The
more inodes inside of the container image will increase the relabel time.

This patch will give people who care more about security the option of
runnin BTRFS with SELinux.  Those who don't want to take the slow down
can disable SELinux either in individual containers or for all containers
by continuing to disable SELinux in the daemon.

Without relabel:

> time docker run --security-opt label:disable fedora echo test
test

real    0m0.918s
user    0m0.009s
sys    0m0.026s

With Relabel

test

real    0m1.942s
user    0m0.007s
sys    0m0.030s

Signed-off-by: Dan Walsh <dwalsh@redhat.com>

Signed-off-by: Dan Walsh <dwalsh@redhat.com>
2015-11-11 14:49:27 -05:00

554 lines
13 KiB
Go

// +build linux
/*
aufs driver directory structure
.
├── layers // Metadata of layers
│ ├── 1
│ ├── 2
│ └── 3
├── diff // Content of the layer
│ ├── 1 // Contains layers that need to be mounted for the id
│ ├── 2
│ └── 3
└── mnt // Mount points for the rw layers to be mounted
├── 1
├── 2
└── 3
*/
package aufs
import (
"bufio"
"fmt"
"io/ioutil"
"os"
"os/exec"
"path"
"strings"
"sync"
"syscall"
"github.com/Sirupsen/logrus"
"github.com/docker/docker/daemon/graphdriver"
"github.com/docker/docker/pkg/archive"
"github.com/docker/docker/pkg/chrootarchive"
"github.com/docker/docker/pkg/directory"
"github.com/docker/docker/pkg/idtools"
mountpk "github.com/docker/docker/pkg/mount"
"github.com/docker/docker/pkg/stringid"
"github.com/opencontainers/runc/libcontainer/label"
)
var (
// ErrAufsNotSupported is returned if aufs is not supported by the host.
ErrAufsNotSupported = fmt.Errorf("AUFS was not found in /proc/filesystems")
incompatibleFsMagic = []graphdriver.FsMagic{
graphdriver.FsMagicBtrfs,
graphdriver.FsMagicAufs,
}
backingFs = "<unknown>"
enableDirpermLock sync.Once
enableDirperm bool
)
func init() {
graphdriver.Register("aufs", Init)
}
type data struct {
referenceCount int
path string
}
// Driver contains information about the filesystem mounted.
// root of the filesystem
// sync.Mutex to protect against concurrent modifications
// active maps mount id to the count
type Driver struct {
root string
uidMaps []idtools.IDMap
gidMaps []idtools.IDMap
sync.Mutex // Protects concurrent modification to active
active map[string]*data
}
// Init returns a new AUFS driver.
// An error is returned if AUFS is not supported.
func Init(root string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
// Try to load the aufs kernel module
if err := supportsAufs(); err != nil {
return nil, graphdriver.ErrNotSupported
}
fsMagic, err := graphdriver.GetFSMagic(root)
if err != nil {
return nil, err
}
if fsName, ok := graphdriver.FsNames[fsMagic]; ok {
backingFs = fsName
}
for _, magic := range incompatibleFsMagic {
if fsMagic == magic {
return nil, graphdriver.ErrIncompatibleFS
}
}
paths := []string{
"mnt",
"diff",
"layers",
}
a := &Driver{
root: root,
active: make(map[string]*data),
uidMaps: uidMaps,
gidMaps: gidMaps,
}
rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
if err != nil {
return nil, err
}
// Create the root aufs driver dir and return
// if it already exists
// If not populate the dir structure
if err := idtools.MkdirAllAs(root, 0755, rootUID, rootGID); err != nil {
if os.IsExist(err) {
return a, nil
}
return nil, err
}
if err := mountpk.MakePrivate(root); err != nil {
return nil, err
}
// Populate the dir structure
for _, p := range paths {
if err := idtools.MkdirAllAs(path.Join(root, p), 0755, rootUID, rootGID); err != nil {
return nil, err
}
}
return a, nil
}
// Return a nil error if the kernel supports aufs
// We cannot modprobe because inside dind modprobe fails
// to run
func supportsAufs() error {
// We can try to modprobe aufs first before looking at
// proc/filesystems for when aufs is supported
exec.Command("modprobe", "aufs").Run()
f, err := os.Open("/proc/filesystems")
if err != nil {
return err
}
defer f.Close()
s := bufio.NewScanner(f)
for s.Scan() {
if strings.Contains(s.Text(), "aufs") {
return nil
}
}
return ErrAufsNotSupported
}
func (a *Driver) rootPath() string {
return a.root
}
func (*Driver) String() string {
return "aufs"
}
// Status returns current information about the filesystem such as root directory, number of directories mounted, etc.
func (a *Driver) Status() [][2]string {
ids, _ := loadIds(path.Join(a.rootPath(), "layers"))
return [][2]string{
{"Root Dir", a.rootPath()},
{"Backing Filesystem", backingFs},
{"Dirs", fmt.Sprintf("%d", len(ids))},
{"Dirperm1 Supported", fmt.Sprintf("%v", useDirperm())},
}
}
// GetMetadata not implemented
func (a *Driver) GetMetadata(id string) (map[string]string, error) {
return nil, nil
}
// Exists returns true if the given id is registered with
// this driver
func (a *Driver) Exists(id string) bool {
if _, err := os.Lstat(path.Join(a.rootPath(), "layers", id)); err != nil {
return false
}
return true
}
// Create three folders for each id
// mnt, layers, and diff
func (a *Driver) Create(id, parent, mountLabel string) error {
if err := a.createDirsFor(id); err != nil {
return err
}
// Write the layers metadata
f, err := os.Create(path.Join(a.rootPath(), "layers", id))
if err != nil {
return err
}
defer f.Close()
if parent != "" {
ids, err := getParentIds(a.rootPath(), parent)
if err != nil {
return err
}
if _, err := fmt.Fprintln(f, parent); err != nil {
return err
}
for _, i := range ids {
if _, err := fmt.Fprintln(f, i); err != nil {
return err
}
}
}
a.active[id] = &data{}
return nil
}
func (a *Driver) createDirsFor(id string) error {
paths := []string{
"mnt",
"diff",
}
rootUID, rootGID, err := idtools.GetRootUIDGID(a.uidMaps, a.gidMaps)
if err != nil {
return err
}
for _, p := range paths {
if err := idtools.MkdirAllAs(path.Join(a.rootPath(), p, id), 0755, rootUID, rootGID); err != nil {
return err
}
}
return nil
}
// Remove will unmount and remove the given id.
func (a *Driver) Remove(id string) error {
// Protect the a.active from concurrent access
a.Lock()
defer a.Unlock()
m := a.active[id]
if m != nil {
if m.referenceCount > 0 {
return nil
}
// Make sure the dir is umounted first
if err := a.unmount(m); err != nil {
return err
}
}
tmpDirs := []string{
"mnt",
"diff",
}
// Atomically remove each directory in turn by first moving it out of the
// way (so that docker doesn't find it anymore) before doing removal of
// the whole tree.
for _, p := range tmpDirs {
realPath := path.Join(a.rootPath(), p, id)
tmpPath := path.Join(a.rootPath(), p, fmt.Sprintf("%s-removing", id))
if err := os.Rename(realPath, tmpPath); err != nil && !os.IsNotExist(err) {
return err
}
defer os.RemoveAll(tmpPath)
}
// Remove the layers file for the id
if err := os.Remove(path.Join(a.rootPath(), "layers", id)); err != nil && !os.IsNotExist(err) {
return err
}
return nil
}
// Get returns the rootfs path for the id.
// This will mount the dir at it's given path
func (a *Driver) Get(id, mountLabel string) (string, error) {
ids, err := getParentIds(a.rootPath(), id)
if err != nil {
if !os.IsNotExist(err) {
return "", err
}
ids = []string{}
}
// Protect the a.active from concurrent access
a.Lock()
defer a.Unlock()
m := a.active[id]
if m == nil {
m = &data{}
a.active[id] = m
}
// If a dir does not have a parent ( no layers )do not try to mount
// just return the diff path to the data
m.path = path.Join(a.rootPath(), "diff", id)
if len(ids) > 0 {
m.path = path.Join(a.rootPath(), "mnt", id)
if m.referenceCount == 0 {
if err := a.mount(id, m, mountLabel); err != nil {
return "", err
}
}
}
m.referenceCount++
return m.path, nil
}
// Put unmounts and updates list of active mounts.
func (a *Driver) Put(id string) error {
// Protect the a.active from concurrent access
a.Lock()
defer a.Unlock()
m := a.active[id]
if m == nil {
return nil
}
if count := m.referenceCount; count > 1 {
m.referenceCount = count - 1
} else {
ids, _ := getParentIds(a.rootPath(), id)
// We only mounted if there are any parents
if ids != nil && len(ids) > 0 {
a.unmount(m)
}
delete(a.active, id)
}
return nil
}
// Diff produces an archive of the changes between the specified
// layer and its parent layer which may be "".
func (a *Driver) Diff(id, parent string) (archive.Archive, error) {
// AUFS doesn't need the parent layer to produce a diff.
return archive.TarWithOptions(path.Join(a.rootPath(), "diff", id), &archive.TarOptions{
Compression: archive.Uncompressed,
ExcludePatterns: []string{archive.WhiteoutMetaPrefix + "*", "!" + archive.WhiteoutOpaqueDir},
UIDMaps: a.uidMaps,
GIDMaps: a.gidMaps,
})
}
func (a *Driver) applyDiff(id string, diff archive.Reader) error {
dir := path.Join(a.rootPath(), "diff", id)
if err := chrootarchive.UntarUncompressed(diff, dir, &archive.TarOptions{
UIDMaps: a.uidMaps,
GIDMaps: a.gidMaps,
}); err != nil {
return err
}
// show invalid whiteouts warning.
files, err := ioutil.ReadDir(path.Join(dir, archive.WhiteoutLinkDir))
if err == nil && len(files) > 0 {
logrus.Warnf("Archive contains aufs hardlink references that are not supported.")
}
return nil
}
// DiffSize calculates the changes between the specified id
// and its parent and returns the size in bytes of the changes
// relative to its base filesystem directory.
func (a *Driver) DiffSize(id, parent string) (size int64, err error) {
// AUFS doesn't need the parent layer to calculate the diff size.
return directory.Size(path.Join(a.rootPath(), "diff", id))
}
// ApplyDiff extracts the changeset from the given diff into the
// layer with the specified id and parent, returning the size of the
// new layer in bytes.
func (a *Driver) ApplyDiff(id, parent string, diff archive.Reader) (size int64, err error) {
// AUFS doesn't need the parent id to apply the diff.
if err = a.applyDiff(id, diff); err != nil {
return
}
return a.DiffSize(id, parent)
}
// Changes produces a list of changes between the specified layer
// and its parent layer. If parent is "", then all changes will be ADD changes.
func (a *Driver) Changes(id, parent string) ([]archive.Change, error) {
// AUFS doesn't have snapshots, so we need to get changes from all parent
// layers.
layers, err := a.getParentLayerPaths(id)
if err != nil {
return nil, err
}
return archive.Changes(layers, path.Join(a.rootPath(), "diff", id))
}
func (a *Driver) getParentLayerPaths(id string) ([]string, error) {
parentIds, err := getParentIds(a.rootPath(), id)
if err != nil {
return nil, err
}
layers := make([]string, len(parentIds))
// Get the diff paths for all the parent ids
for i, p := range parentIds {
layers[i] = path.Join(a.rootPath(), "diff", p)
}
return layers, nil
}
func (a *Driver) mount(id string, m *data, mountLabel string) error {
// If the id is mounted or we get an error return
if mounted, err := a.mounted(m); err != nil || mounted {
return err
}
var (
target = m.path
rw = path.Join(a.rootPath(), "diff", id)
)
layers, err := a.getParentLayerPaths(id)
if err != nil {
return err
}
if err := a.aufsMount(layers, rw, target, mountLabel); err != nil {
return fmt.Errorf("error creating aufs mount to %s: %v", target, err)
}
return nil
}
func (a *Driver) unmount(m *data) error {
if mounted, err := a.mounted(m); err != nil || !mounted {
return err
}
return Unmount(m.path)
}
func (a *Driver) mounted(m *data) (bool, error) {
return mountpk.Mounted(m.path)
}
// Cleanup aufs and unmount all mountpoints
func (a *Driver) Cleanup() error {
for id, m := range a.active {
if err := a.unmount(m); err != nil {
logrus.Errorf("Unmounting %s: %s", stringid.TruncateID(id), err)
}
}
return mountpk.Unmount(a.root)
}
func (a *Driver) aufsMount(ro []string, rw, target, mountLabel string) (err error) {
defer func() {
if err != nil {
Unmount(target)
}
}()
// Mount options are clipped to page size(4096 bytes). If there are more
// layers then these are remounted individually using append.
offset := 54
if useDirperm() {
offset += len("dirperm1")
}
b := make([]byte, syscall.Getpagesize()-len(mountLabel)-offset) // room for xino & mountLabel
bp := copy(b, fmt.Sprintf("br:%s=rw", rw))
firstMount := true
i := 0
for {
for ; i < len(ro); i++ {
layer := fmt.Sprintf(":%s=ro+wh", ro[i])
if firstMount {
if bp+len(layer) > len(b) {
break
}
bp += copy(b[bp:], layer)
} else {
data := label.FormatMountLabel(fmt.Sprintf("append%s", layer), mountLabel)
if err = mount("none", target, "aufs", syscall.MS_REMOUNT, data); err != nil {
return
}
}
}
if firstMount {
opts := "dio,noplink,xino=/dev/shm/aufs.xino"
if useDirperm() {
opts += ",dirperm1"
}
data := label.FormatMountLabel(fmt.Sprintf("%s,%s", string(b[:bp]), opts), mountLabel)
if err = mount("none", target, "aufs", 0, data); err != nil {
return
}
firstMount = false
}
if i == len(ro) {
break
}
}
return
}
// useDirperm checks dirperm1 mount option can be used with the current
// version of aufs.
func useDirperm() bool {
enableDirpermLock.Do(func() {
base, err := ioutil.TempDir("", "docker-aufs-base")
if err != nil {
logrus.Errorf("error checking dirperm1: %v", err)
return
}
defer os.RemoveAll(base)
union, err := ioutil.TempDir("", "docker-aufs-union")
if err != nil {
logrus.Errorf("error checking dirperm1: %v", err)
return
}
defer os.RemoveAll(union)
opts := fmt.Sprintf("br:%s,dirperm1,xino=/dev/shm/aufs.xino", base)
if err := mount("none", union, "aufs", 0, opts); err != nil {
return
}
enableDirperm = true
if err := Unmount(union); err != nil {
logrus.Errorf("error checking dirperm1: failed to unmount %v", err)
}
})
return enableDirperm
}