binctr/vendor/github.com/Microsoft/hcsshim/internal/uvm/create.go
Jess Frazelle 94d1cfbfbf
update vendor
Signed-off-by: Jess Frazelle <acidburn@microsoft.com>
2018-09-25 12:27:46 -04:00

450 lines
15 KiB
Go

package uvm
import (
"encoding/binary"
"fmt"
"net"
"os"
"path/filepath"
"runtime"
"strings"
"github.com/Microsoft/hcsshim/internal/guid"
"github.com/Microsoft/hcsshim/internal/hcs"
"github.com/Microsoft/hcsshim/internal/mergemaps"
"github.com/Microsoft/hcsshim/internal/schema2"
"github.com/Microsoft/hcsshim/internal/schemaversion"
"github.com/Microsoft/hcsshim/internal/uvmfolder"
"github.com/Microsoft/hcsshim/internal/wclayer"
"github.com/Microsoft/hcsshim/internal/wcow"
"github.com/linuxkit/virtsock/pkg/hvsock"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
)
type PreferredRootFSType int
const (
PreferredRootFSTypeInitRd = 0
PreferredRootFSTypeVHD = 1
initrdFile = "initrd.img"
vhdFile = "rootfs.vhd"
)
// UVMOptions are the set of options passed to Create() to create a utility vm.
type UVMOptions struct {
ID string // Identifier for the uvm. Defaults to generated GUID.
Owner string // Specifies the owner. Defaults to executable name.
OperatingSystem string // "windows" or "linux".
Resources *specs.WindowsResources // Optional resources for the utility VM. Supports Memory.limit and CPU.Count only currently. // TODO consider extending?
AdditionHCSDocumentJSON string // Optional additional JSON to merge into the HCS document prior
// WCOW specific parameters
LayerFolders []string // Set of folders for base layers and scratch. Ordered from top most read-only through base read-only layer, followed by scratch
// LCOW specific parameters
BootFilesPath string // Folder in which kernel and root file system reside. Defaults to \Program Files\Linux Containers
KernelFile string // Filename under BootFilesPath for the kernel. Defaults to `kernel`
RootFSFile string // Filename under BootFilesPath for the UVMs root file system. Defaults are `initrd.img` or `rootfs.vhd`.
PreferredRootFSType *PreferredRootFSType // Controls searching for the RootFSFile.
KernelBootOptions string // Additional boot options for the kernel
EnableGraphicsConsole bool // If true, enable a graphics console for the utility VM
ConsolePipe string // The named pipe path to use for the serial console. eg \\.\pipe\vmpipe
VPMemDeviceCount *int32 // Number of VPMem devices. Limit at 128. If booting UVM from VHD, device 0 is taken. LCOW Only.
SCSIControllerCount *int // The number of SCSI controllers. Defaults to 1 if omitted. Currently we only support 0 or 1.
}
const linuxLogVsockPort = 109
// Create creates an HCS compute system representing a utility VM.
//
// WCOW Notes:
// - If the scratch folder does not exist, it will be created
// - If the scratch folder does not contain `sandbox.vhdx` it will be created based on the system template located in the layer folders.
// - The scratch is always attached to SCSI 0:0
//
func Create(opts *UVMOptions) (_ *UtilityVM, err error) {
logrus.Debugf("uvm::Create %+v", opts)
if opts == nil {
return nil, fmt.Errorf("no options supplied to create")
}
uvm := &UtilityVM{
id: opts.ID,
owner: opts.Owner,
operatingSystem: opts.OperatingSystem,
}
uvmFolder := "" // Windows
if opts.OperatingSystem != "linux" && opts.OperatingSystem != "windows" {
logrus.Debugf("uvm::Create Unsupported OS")
return nil, fmt.Errorf("unsupported operating system %q", opts.OperatingSystem)
}
// Defaults if omitted by caller.
// TODO: Change this. Don't auto generate ID if omitted. Avoids the chicken-and-egg problem
if uvm.id == "" {
uvm.id = guid.New().String()
}
if uvm.owner == "" {
uvm.owner = filepath.Base(os.Args[0])
}
attachments := make(map[string]hcsschema.Attachment)
scsi := make(map[string]hcsschema.Scsi)
uvm.scsiControllerCount = 1
var actualRootFSType PreferredRootFSType = PreferredRootFSTypeInitRd // TODO Should we switch to VPMem/VHD as default?
if uvm.operatingSystem == "windows" {
if len(opts.LayerFolders) < 2 {
return nil, fmt.Errorf("at least 2 LayerFolders must be supplied")
}
if opts.VPMemDeviceCount != nil {
return nil, fmt.Errorf("cannot specify VPMemDeviceCount for Windows utility VMs")
}
var err error
uvmFolder, err = uvmfolder.LocateUVMFolder(opts.LayerFolders)
if err != nil {
return nil, fmt.Errorf("failed to locate utility VM folder from layer folders: %s", err)
}
// TODO: BUGBUG Remove this. @jhowardmsft
// It should be the responsiblity of the caller to do the creation and population.
// - Update runhcs too (vm.go).
// - Remove comment in function header
// - Update tests that rely on this current behaviour.
// Create the RW scratch in the top-most layer folder, creating the folder if it doesn't already exist.
scratchFolder := opts.LayerFolders[len(opts.LayerFolders)-1]
logrus.Debugf("uvm::createWCOW scratch folder: %s", scratchFolder)
// Create the directory if it doesn't exist
if _, err := os.Stat(scratchFolder); os.IsNotExist(err) {
logrus.Debugf("uvm::createWCOW Creating folder: %s ", scratchFolder)
if err := os.MkdirAll(scratchFolder, 0777); err != nil {
return nil, fmt.Errorf("failed to create utility VM scratch folder: %s", err)
}
}
// Create sandbox.vhdx in the scratch folder based on the template, granting the correct permissions to it
if _, err := os.Stat(filepath.Join(scratchFolder, `sandbox.vhdx`)); os.IsNotExist(err) {
if err := wcow.CreateUVMScratch(uvmFolder, scratchFolder, uvm.id); err != nil {
return nil, fmt.Errorf("failed to create scratch: %s", err)
}
}
// We attach the scratch to SCSI 0:0
attachments["0"] = hcsschema.Attachment{
Path: filepath.Join(scratchFolder, "sandbox.vhdx"),
Type_: "VirtualDisk",
}
scsi["0"] = hcsschema.Scsi{Attachments: attachments}
uvm.scsiLocations[0][0].hostPath = attachments["0"].Path
} else {
uvm.vpmemMax = DefaultVPMEM
if opts.VPMemDeviceCount != nil {
if *opts.VPMemDeviceCount > MaxVPMEM || *opts.VPMemDeviceCount < 0 {
return nil, fmt.Errorf("vpmem device count must between 0 and %d", MaxVPMEM)
}
uvm.vpmemMax = *opts.VPMemDeviceCount
logrus.Debugln("uvm::Create:: uvm.vpmemMax=", uvm.vpmemMax)
}
scsi["0"] = hcsschema.Scsi{Attachments: attachments}
uvm.scsiControllerCount = 1
if opts.SCSIControllerCount != nil {
if *opts.SCSIControllerCount < 0 || *opts.SCSIControllerCount > 1 {
return nil, fmt.Errorf("SCSI controller count must be 0 or 1") // Future extension here for up to 4
}
uvm.scsiControllerCount = *opts.SCSIControllerCount
if uvm.scsiControllerCount == 0 {
scsi = nil
}
logrus.Debugln("uvm::Create:: uvm.scsiControllerCount=", uvm.scsiControllerCount)
}
if opts.BootFilesPath == "" {
opts.BootFilesPath = filepath.Join(os.Getenv("ProgramFiles"), "Linux Containers")
}
if opts.KernelFile == "" {
opts.KernelFile = "kernel"
}
if _, err := os.Stat(filepath.Join(opts.BootFilesPath, opts.KernelFile)); os.IsNotExist(err) {
return nil, fmt.Errorf("kernel '%s' not found", filepath.Join(opts.BootFilesPath, opts.KernelFile))
}
if opts.RootFSFile == "" {
if opts.PreferredRootFSType != nil {
actualRootFSType = *opts.PreferredRootFSType
if actualRootFSType != PreferredRootFSTypeInitRd && actualRootFSType != PreferredRootFSTypeVHD {
return nil, fmt.Errorf("invalid PreferredRootFSType")
}
}
switch actualRootFSType {
case PreferredRootFSTypeInitRd:
if _, err := os.Stat(filepath.Join(opts.BootFilesPath, initrdFile)); os.IsNotExist(err) {
return nil, fmt.Errorf("initrd not found")
}
opts.RootFSFile = initrdFile
case PreferredRootFSTypeVHD:
if _, err := os.Stat(filepath.Join(opts.BootFilesPath, vhdFile)); os.IsNotExist(err) {
return nil, fmt.Errorf("rootfs.vhd not found")
}
opts.RootFSFile = vhdFile
}
} else {
// Determine the root FS type by the extension of the explicitly supplied RootFSFile
if _, err := os.Stat(filepath.Join(opts.BootFilesPath, opts.RootFSFile)); os.IsNotExist(err) {
return nil, fmt.Errorf("%s not found under %s", opts.RootFSFile, opts.BootFilesPath)
}
switch strings.ToLower(filepath.Ext(opts.RootFSFile)) {
case "vhd", "vhdx":
actualRootFSType = PreferredRootFSTypeVHD
case "img":
actualRootFSType = PreferredRootFSTypeInitRd
default:
return nil, fmt.Errorf("unsupported filename extension for RootFSFile")
}
}
}
memory := int32(1024)
processors := int32(2)
if runtime.NumCPU() == 1 {
processors = 1
}
if opts.Resources != nil {
if opts.Resources.Memory != nil && opts.Resources.Memory.Limit != nil {
memory = int32(*opts.Resources.Memory.Limit / 1024 / 1024) // OCI spec is in bytes. HCS takes MB
}
if opts.Resources.CPU != nil && opts.Resources.CPU.Count != nil {
processors = int32(*opts.Resources.CPU.Count)
}
}
vm := &hcsschema.VirtualMachine{
Chipset: &hcsschema.Chipset{
Uefi: &hcsschema.Uefi{},
},
ComputeTopology: &hcsschema.Topology{
Memory: &hcsschema.Memory2{
AllowOvercommit: true,
SizeInMB: memory,
},
Processor: &hcsschema.Processor2{
Count: processors,
},
},
GuestConnection: &hcsschema.GuestConnection{},
Devices: &hcsschema.Devices{
Scsi: scsi,
HvSocket: &hcsschema.HvSocket2{
HvSocketConfig: &hcsschema.HvSocketSystemConfig{
// Allow administrators and SYSTEM to bind to vsock sockets
// so that we can create a GCS log socket.
DefaultBindSecurityDescriptor: "D:P(A;;FA;;;SY)(A;;FA;;;BA)",
},
},
},
}
hcsDocument := &hcsschema.ComputeSystem{
Owner: uvm.owner,
SchemaVersion: schemaversion.SchemaV21(),
VirtualMachine: vm,
}
if uvm.operatingSystem == "windows" {
vm.Chipset.Uefi.BootThis = &hcsschema.UefiBootEntry{
DevicePath: `\EFI\Microsoft\Boot\bootmgfw.efi`,
DeviceType: "VmbFs",
}
vm.Devices.VirtualSmb = &hcsschema.VirtualSmb{
DirectFileMappingInMB: 1024, // Sensible default, but could be a tuning parameter somewhere
Shares: []hcsschema.VirtualSmbShare{
{
Name: "os",
Path: filepath.Join(uvmFolder, `UtilityVM\Files`),
Options: &hcsschema.VirtualSmbShareOptions{
ReadOnly: true,
PseudoOplocks: true,
TakeBackupPrivilege: true,
CacheIo: true,
ShareRead: true,
},
},
},
}
} else {
vmDebugging := false
vm.GuestConnection.UseVsock = true
vm.GuestConnection.UseConnectedSuspend = true
vm.Devices.VirtualSmb = &hcsschema.VirtualSmb{
Shares: []hcsschema.VirtualSmbShare{
{
Name: "os",
Path: opts.BootFilesPath,
Options: &hcsschema.VirtualSmbShareOptions{
ReadOnly: true,
TakeBackupPrivilege: true,
CacheIo: true,
ShareRead: true,
},
},
},
}
if uvm.vpmemMax > 0 {
vm.Devices.VirtualPMem = &hcsschema.VirtualPMemController{
MaximumCount: uvm.vpmemMax,
}
}
kernelArgs := "initrd=/" + opts.RootFSFile
if actualRootFSType == PreferredRootFSTypeVHD {
kernelArgs = "root=/dev/pmem0 init=/init"
}
// Support for VPMem VHD(X) booting rather than initrd..
if actualRootFSType == PreferredRootFSTypeVHD {
if uvm.vpmemMax == 0 {
return nil, fmt.Errorf("PreferredRootFSTypeVHD requess at least one VPMem device")
}
imageFormat := "Vhd1"
if strings.ToLower(filepath.Ext(opts.RootFSFile)) == "vhdx" {
imageFormat = "Vhdx"
}
vm.Devices.VirtualPMem.Devices = map[string]hcsschema.VirtualPMemDevice{
"0": {
HostPath: filepath.Join(opts.BootFilesPath, opts.RootFSFile),
ReadOnly: true,
ImageFormat: imageFormat,
},
}
if err := wclayer.GrantVmAccess(uvm.id, filepath.Join(opts.BootFilesPath, opts.RootFSFile)); err != nil {
return nil, fmt.Errorf("faied to grantvmaccess to %s: %s", filepath.Join(opts.BootFilesPath, opts.RootFSFile), err)
}
// Add to our internal structure
uvm.vpmemDevices[0] = vpmemInfo{
hostPath: opts.RootFSFile,
uvmPath: "/",
refCount: 1,
}
}
if opts.ConsolePipe != "" {
vmDebugging = true
kernelArgs += " console=ttyS0,115200"
vm.Devices.ComPorts = map[string]hcsschema.ComPort{
"0": { // Which is actually COM1
NamedPipe: opts.ConsolePipe,
},
}
}
if opts.EnableGraphicsConsole {
vmDebugging = true
kernelArgs += " console=tty"
vm.Devices.Keyboard = &hcsschema.Keyboard{}
vm.Devices.EnhancedModeVideo = &hcsschema.EnhancedModeVideo{}
vm.Devices.VideoMonitor = &hcsschema.VideoMonitor{}
}
if !vmDebugging {
// Terminate the VM if there is a kernel panic.
kernelArgs += " panic=-1"
}
if opts.KernelBootOptions != "" {
kernelArgs += " " + opts.KernelBootOptions
}
// Start GCS with stderr pointing to the vsock port created below in
// order to forward guest logs to logrus.
initArgs := fmt.Sprintf("/bin/vsockexec -e %d /bin/gcs -log-format json -loglevel %s",
linuxLogVsockPort,
logrus.StandardLogger().Level.String())
if vmDebugging {
// Launch a shell on the console.
initArgs = `sh -c "` + initArgs + ` & exec sh"`
}
kernelArgs += ` -- ` + initArgs
vm.Chipset.Uefi.BootThis = &hcsschema.UefiBootEntry{
DevicePath: `\` + opts.KernelFile,
DeviceType: "VmbFs",
OptionalData: kernelArgs,
}
}
fullDoc, err := mergemaps.MergeJSON(hcsDocument, ([]byte)(opts.AdditionHCSDocumentJSON))
if err != nil {
return nil, fmt.Errorf("failed to merge additional JSON '%s': %s", opts.AdditionHCSDocumentJSON, err)
}
hcsSystem, err := hcs.CreateComputeSystem(uvm.id, fullDoc)
if err != nil {
logrus.Debugln("failed to create UVM: ", err)
return nil, err
}
uvm.hcsSystem = hcsSystem
defer func() {
if err != nil {
uvm.Close()
}
}()
if uvm.operatingSystem == "linux" {
// Create a socket that the GCS can send logrus log data to.
uvm.gcslog, err = uvm.listenVsock(linuxLogVsockPort)
if err != nil {
return nil, err
}
}
return uvm, nil
}
func (uvm *UtilityVM) listenVsock(port uint32) (net.Listener, error) {
properties, err := uvm.hcsSystem.Properties()
if err != nil {
return nil, err
}
vmID, err := hvsock.GUIDFromString(properties.RuntimeID)
if err != nil {
return nil, err
}
serviceID, _ := hvsock.GUIDFromString("00000000-facb-11e6-bd58-64006a7986d3")
binary.LittleEndian.PutUint32(serviceID[0:4], port)
return hvsock.Listen(hvsock.Addr{VMID: vmID, ServiceID: serviceID})
}
// ID returns the ID of the VM's compute system.
func (uvm *UtilityVM) ID() string {
return uvm.hcsSystem.ID()
}
// OS returns the operating system of the utility VM.
func (uvm *UtilityVM) OS() string {
return uvm.operatingSystem
}
// Close terminates and releases resources associated with the utility VM.
func (uvm *UtilityVM) Close() error {
uvm.Terminate()
if uvm.gcslog != nil {
uvm.gcslog.Close()
uvm.gcslog = nil
}
err := uvm.hcsSystem.Close()
uvm.hcsSystem = nil
return err
}