diff --git a/README.md b/README.md index 4066bcb..fafb835 100644 --- a/README.md +++ b/README.md @@ -33,20 +33,38 @@ $> sudo dmesg | tail -1 ### Container -Running ioctl's inside containers is a little tricky. Assuming we've already `insmod` the module above: ```shell -sudo docker run -it --rm -v $(pwd)/helloctl/helloctl:/usr/bin/helloctl:ro -v /dev/helloctl:/dev/helloctl:ro fedora /usr/bin/helloctl +sudo docker run -it --rm -v $(pwd)/helloctl/helloctl:/usr/bin/helloctl:ro --device /dev/helloctl fedora /usr/bin/helloctl ``` -You'll get a failure `Could not open /dev/helloctl` +Now `dmesg | tail` will reflect the command ran successfully, but at the sake of running an non-isolated container. + +### runc + +Determining major/minor for setting permissions requires inserting the module, then collecting the major/minor device, and putting that to a runc `config.json`. ```shell -sudo docker run -it --rm -v $(pwd)/helloctl/helloctl:/usr/bin/helloctl:ro -v /dev/helloctl:/dev/helloctl:ro --privileged fedora /usr/bin/helloctl +$> stat -c "%t %T" /dev/helloctl +a 39 +$> echo "$((16#$(stat -c "%t" /dev/helloctl )))" +10 +$> echo "$((16#$(stat -c "%T" /dev/helloctl )))" +57 ``` -Now `dmesg | tail` will reflect the command ran successfully. +Now in the `config.json`, under `linux.resources.devices`, that array, it needs the following with the major/minor integers from your `/dev/helloctl`: + +```json + { + "allow": true, + "type": "c", + "major": 10, + "minor": 57, + "access": "rwm" + }, +``` ### cleanup diff --git a/cap_check.stp b/cap_check.stp new file mode 100755 index 0000000..1bbb835 --- /dev/null +++ b/cap_check.stp @@ -0,0 +1,211 @@ +#! /usr/bin/env stap +# Copyright (C) 2016 Red Hat, Inc. +# Written by William Cohen +# +# container_check.stp watches for use of +# prohibited capabilities, use of prohibited syscalls, and +# syscall failures) that would indicate that this application +# would not operate properly in a restricted contiainer. +# +# By default this script monitors all systemcalls system-wide. +# To limit to limit container_check.stp to monitoring a particular +# process and it children use the systemtap -x option +# or -c option. +# +# By default this script lists all capabilities requested. +# To limit it to a subset of capabilities use the following +# option on the command line with a '-' separated list of +# forbidden capabilites: +# +# -G forbidden_capabilities="badcap1-badcap2" +# +# By default this script allows all syscalls. +# To mark syscalls as forbidden use a '-' separate list: +# +# -G forbidden_syscalls="syscall1-syscall2" +# +# control-c to exit data collection + +global forbidden_capabilities="" # '-' separated list of forbidden capabilities +global forbidden_syscalls="" # '-' separated list of forbidden syscalls + +global capability, cap_use +global badcaps = -1, cap_name +global cap_syscall +global badsyscall +global problem_syscall +global syscall_errno + +# Determine whether t is a ancestor of target() +# returns 1 if ancestor of target() +# returns 0 if not an ancestor of target() +function child_of_target:long (t:long) +{ + if (!target()) return 1 + while(t && t != task_parent(t)) { + if (task_pid(t) == target()) return 1 + t = task_parent(t) + } + return 0 +} + +function init_cap_name2num() +{ + /* set up the names */ + cap_name[0]="cap_chown" + cap_name[1]="cap_dac_override" + cap_name[2]="cap_dac_read_search" + cap_name[3]="cap_fowner" + cap_name[4]="cap_fsetid" + cap_name[5]="cap_kill" + cap_name[6]="cap_setgid" + cap_name[7]="cap_setuid" + cap_name[8]="cap_setpcap" + cap_name[9]="cap_linux_immutable" + cap_name[10]="cap_net_bind_service" + cap_name[11]="cap_net_broadcast" + cap_name[12]="cap_net_admin" + cap_name[13]="cap_net_raw" + cap_name[14]="cap_ipc_lock" + cap_name[15]="cap_ipc_owner" + cap_name[16]="cap_sys_module" + cap_name[17]="cap_sys_rawio" + cap_name[18]="cap_sys_chroot" + cap_name[19]="cap_sys_ptrace" + cap_name[20]="cap_sys_pacct" + cap_name[21]="cap_sys_admin" + cap_name[22]="cap_sys_boot" + cap_name[23]="cap_sys_nice" + cap_name[24]="cap_sys_resource" + cap_name[25]="cap_sys_time" + cap_name[26]="cap_sys_tty_config" + cap_name[27]="cap_mknod" + cap_name[28]="cap_lease" + cap_name[29]="cap_audit_write" + cap_name[30]="cap_audit_control" + cap_name[31]="cap_setfcap" + cap_name[32]="cap_mac_override" + cap_name[33]="cap_mac_admin" + cap_name[34]="cap_syslog" + cap_name[35]="cap_wake_alarm" + cap_name[36]="cap_block_suspend" +} + +function parse_capabilities() { + /* convert optional list of forbidden capabilities into a bitmask */ + caps = 0 + cname = tokenize(forbidden_capabilities, "-") + while (cname != "") { + i =36 + while(i>0) { + if(cname == cap_name[i]) { + caps |= 1<> 1 + i += 1 + } + printf("\n") + } + + printf("\n\ncapabilities used by syscalls\n"); + printf("%16s, %20s ( %16s ) : %16s\n", "executable", "syscall", "capability", "count") + foreach([e+,s,c] in cap_syscall){ + printf("%16s, %20s ( ", e, s); + cap = c + i=0 + while (cap) { + if (cap & 1) + printf("%16s ", cap_name[i] ); + cap = cap >> 1 + i += 1 + } + printf(") : %16d\n", @count(cap_syscall[e,s,c]) ); + } + + printf("\n\nforbidden syscalls\n"); + printf("%16s, %20s: %16s\n", "executable", "syscall", "count") + foreach([e+,s] in problem_syscall){ + printf("%16s, %20s: %16d\n", e, s, @count(problem_syscall[e,s]) ); + } + + printf("\n\nfailed syscalls\n"); + printf("%16s, %20s = %16s: %16s\n", "executable", "syscall", "errno", "count") + foreach([e+,s,v] in syscall_errno){ + printf("%16s, %20s = %16s: %16d\n", e, s, errno_str(v), + @count(syscall_errno[e,s,v]) ); + } +} diff --git a/config.json b/config.json new file mode 100644 index 0000000..e500204 --- /dev/null +++ b/config.json @@ -0,0 +1,250 @@ +{ + "ociVersion": "1.0.0-rc2-dev", + "platform": { + "os": "linux", + "arch": "amd64" + }, + "process": { + "terminal": true, + "consoleSize": { + "height": 0, + "width": 0 + }, + "user": { + "uid": 0, + "gid": 0 + }, + "args": [ + "/usr/bin/helloctl" + ], + "env": [ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "HOSTNAME=helloctl", + "TERM=xterm", + "DISTTAG=f26container", + "FGC=f26" + ], + "cwd": "/", + "capabilities": [ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + "CAP_SYS_PTRACE" + ] + }, + "root": { + "path": "./rootfs/" + }, + "hostname": "helloctl", + "mounts": [ + { + "destination": "/proc", + "type": "proc", + "source": "proc", + "options": [ + "nosuid", + "noexec", + "nodev" + ] + }, + { + "destination": "/dev", + "type": "tmpfs", + "source": "tmpfs", + "options": [ + "nosuid", + "strictatime", + "mode=755" + ] + }, + { + "destination": "/dev/pts", + "type": "devpts", + "source": "devpts", + "options": [ + "nosuid", + "noexec", + "newinstance", + "ptmxmode=0666", + "mode=0620", + "gid=5" + ] + }, + { + "destination": "/sys", + "type": "sysfs", + "source": "sysfs", + "options": [ + "nosuid", + "noexec", + "nodev", + "ro" + ] + }, + { + "destination": "/sys/fs/cgroup", + "type": "cgroup", + "source": "cgroup", + "options": [ + "ro", + "nosuid", + "noexec", + "nodev" + ] + }, + { + "destination": "/dev/mqueue", + "type": "mqueue", + "source": "mqueue", + "options": [ + "nosuid", + "noexec", + "nodev" + ] + }, + { + "destination": "/dev/helloctl", + "type": "bind", + "source": "/dev/helloctl", + "options": [ + "rbind", + "ro", + "rprivate" + ] + }, + { + "destination": "/usr/bin/helloctl", + "type": "bind", + "source": "./helloctl/helloctl", + "options": [ + "rbind", + "ro", + "rprivate" + ] + } + ], + "hooks": { + "prestart": [ ], + "poststop": [ ] + }, + "linux": { + "resources": { + "devices": [ + { + "allow": false, + "access": "rwm" + }, + { + "allow": true, + "type": "c", + "major": 1, + "minor": 5, + "access": "rwm" + }, + { + "allow": true, + "type": "c", + "major": 1, + "minor": 3, + "access": "rwm" + }, + { + "allow": true, + "type": "c", + "major": 1, + "minor": 9, + "access": "rwm" + }, + { + "allow": true, + "type": "c", + "major": 1, + "minor": 8, + "access": "rwm" + }, + { + "allow": true, + "type": "c", + "major": 5, + "minor": 0, + "access": "rwm" + }, + { + "allow": true, + "type": "c", + "major": 5, + "minor": 1, + "access": "rwm" + }, + { + "allow": true, + "type": "c", + "major": 10, + "minor": 57, + "access": "rwm" + }, + { + "allow": false, + "type": "c", + "major": 10, + "minor": 229, + "access": "rwm" + } + ], + "disableOOMKiller": false, + "oomScoreAdj": 0, + "cpu": {}, + "pids": { + "limit": 0 + }, + "blockIO": { + "blkioWeight": 0 + } + }, + "cgroupsPath": "system.slice:docker:8ad3dfde3644481046eace9cd586600f0416d3c43b4b9f4cc161c470859c0e17", + "namespaces": [ + { + "type": "mount" + }, + { + "type": "network" + }, + { + "type": "uts" + }, + { + "type": "pid" + }, + { + "type": "ipc" + } + ], + "maskedPaths": [ + "/proc/kcore", + "/proc/latency_stats", + "/proc/timer_list", + "/proc/timer_stats", + "/proc/sched_debug", + "/sys/firmware" + ], + "readonlyPaths": [ + "/proc/asound", + "/proc/bus", + "/proc/fs", + "/proc/irq", + "/proc/sys", + "/proc/sysrq-trigger" + ] + } +}