diff --git a/virtcontainers/api.go b/virtcontainers/api.go index 8925f458f1..c3197c1c50 100644 --- a/virtcontainers/api.go +++ b/virtcontainers/api.go @@ -224,6 +224,10 @@ func StartSandbox(ctx context.Context, sandboxID string) (VCSandbox, error) { return nil, err } + if err = s.storeSandbox(); err != nil { + return nil, err + } + return s, nil } @@ -256,6 +260,10 @@ func StopSandbox(ctx context.Context, sandboxID string) (VCSandbox, error) { return nil, err } + if err = s.storeSandbox(); err != nil { + return nil, err + } + return s, nil } @@ -394,6 +402,10 @@ func CreateContainer(ctx context.Context, sandboxID string, containerConfig Cont return nil, nil, err } + if err = s.storeSandbox(); err != nil { + return nil, nil, err + } + return s, c, nil } diff --git a/virtcontainers/container.go b/virtcontainers/container.go index 2c7e8c0572..6030d73550 100644 --- a/virtcontainers/container.go +++ b/virtcontainers/container.go @@ -258,8 +258,18 @@ type SystemMountsInfo struct { type ContainerDevice struct { // ID is device id referencing the device from sandbox's device manager ID string + // ContainerPath is device path displayed in container ContainerPath string + + // FileMode permission bits for the device. + FileMode os.FileMode + + // UID is user ID in the container namespace + UID uint32 + + // GID is group ID in the container namespace + GID uint32 } // RootFs describes the container's rootfs. @@ -364,7 +374,14 @@ func (c *Container) SetPid(pid int) error { func (c *Container) setStateFstype(fstype string) error { c.state.Fstype = fstype - return c.storeState() + if !c.sandbox.supportNewStore() { + // experimental runtime use "persist.json" which doesn't need "state.json" anymore + if err := c.storeState(); err != nil { + return err + } + } + + return nil } // GetAnnotations returns container's annotations @@ -374,6 +391,11 @@ func (c *Container) GetAnnotations() map[string]string { // storeContainer stores a container config. func (c *Container) storeContainer() error { + if c.sandbox.supportNewStore() { + if err := c.sandbox.newStore.ToDisk(); err != nil { + return err + } + } return c.store.Store(store.Configuration, *(c.config)) } @@ -423,10 +445,17 @@ func (c *Container) setContainerState(state types.StateString) error { // update in-memory state c.state.State = state - // update on-disk state - err := c.store.Store(store.State, c.state) - if err != nil { - return err + if c.sandbox.supportNewStore() { + // flush data to storage + if err := c.sandbox.newStore.ToDisk(); err != nil { + return err + } + } else { + // experimental runtime use "persist.json" which doesn't need "state.json" anymore + // update on-disk state + if err := c.store.Store(store.State, c.state); err != nil { + return err + } } return nil @@ -588,9 +617,9 @@ func (c *Container) unmountHostMounts() error { return nil } -func filterDevices(sandbox *Sandbox, c *Container, devices []ContainerDevice) (ret []ContainerDevice) { +func filterDevices(c *Container, devices []ContainerDevice) (ret []ContainerDevice) { for _, dev := range devices { - major, _ := sandbox.devManager.GetDeviceByID(dev.ID).GetMajorMinor() + major, _ := c.sandbox.devManager.GetDeviceByID(dev.ID).GetMajorMinor() if _, ok := cdromMajors[major]; ok { c.Logger().WithFields(logrus.Fields{ "device": dev.ContainerPath, @@ -670,9 +699,22 @@ func newContainer(sandbox *Sandbox, contConfig ContainerConfig) (*Container, err c.store = ctrStore - state, err := c.store.LoadContainerState() - if err == nil { - c.state = state + // experimental runtime use "persist.json" instead of legacy "state.json" as storage + if c.sandbox.supportNewStore() { + if err := c.Restore(); err != nil && + !os.IsNotExist(err) && err != errContainerPersistNotExist { + return nil, err + } + } else { + state, err := c.store.LoadContainerState() + if err == nil { + c.state = state + } + } + + if err := c.Restore(); err != nil && + !os.IsNotExist(err) && err != errContainerPersistNotExist { + return nil, err } var process Process @@ -680,6 +722,18 @@ func newContainer(sandbox *Sandbox, contConfig ContainerConfig) (*Container, err c.process = process } + if err = c.createMounts(); err != nil { + return nil, err + } + + if err = c.createDevices(contConfig); err != nil { + return nil, err + } + + return c, nil +} + +func (c *Container) createMounts() error { mounts, err := c.loadMounts() if err == nil { // restore mounts from disk @@ -687,10 +741,14 @@ func newContainer(sandbox *Sandbox, contConfig ContainerConfig) (*Container, err } else { // Create block devices for newly created container if err := c.createBlockDevices(); err != nil { - return nil, err + return err } } + return nil +} + +func (c *Container) createDevices(contConfig ContainerConfig) error { // Devices will be found in storage after create stage has completed. // We load devices from storage at all other stages. storedDevices, err := c.loadDevices() @@ -700,20 +758,22 @@ func newContainer(sandbox *Sandbox, contConfig ContainerConfig) (*Container, err // If devices were not found in storage, create Device implementations // from the configuration. This should happen at create. for _, info := range contConfig.DeviceInfos { - dev, err := sandbox.devManager.NewDevice(info) + dev, err := c.sandbox.devManager.NewDevice(info) if err != nil { - return &Container{}, err + return err } storedDevices = append(storedDevices, ContainerDevice{ ID: dev.DeviceID(), ContainerPath: info.ContainerPath, + FileMode: info.FileMode, + UID: info.UID, + GID: info.GID, }) } - c.devices = filterDevices(sandbox, c, storedDevices) + c.devices = filterDevices(c, storedDevices) } - - return c, nil + return nil } // rollbackFailingContainerCreation rolls back important steps that might have diff --git a/virtcontainers/container_test.go b/virtcontainers/container_test.go index 5e28e5e29a..333d2e4889 100644 --- a/virtcontainers/container_test.go +++ b/virtcontainers/container_test.go @@ -20,6 +20,7 @@ import ( "github.com/kata-containers/runtime/virtcontainers/device/config" "github.com/kata-containers/runtime/virtcontainers/device/drivers" "github.com/kata-containers/runtime/virtcontainers/device/manager" + "github.com/kata-containers/runtime/virtcontainers/persist" "github.com/kata-containers/runtime/virtcontainers/store" "github.com/kata-containers/runtime/virtcontainers/types" "github.com/stretchr/testify/assert" @@ -236,6 +237,10 @@ func TestContainerAddDriveDir(t *testing.T) { assert.Nil(t, err) sandbox.store = sandboxStore + if sandbox.newStore, err = persist.GetDriver("fs"); err != nil || sandbox.newStore == nil { + t.Fatalf("failed to get fs persist driver") + } + contID := "100" container := Container{ sandbox: sandbox, diff --git a/virtcontainers/device/api/interface.go b/virtcontainers/device/api/interface.go index da15831aef..560a40d1b3 100644 --- a/virtcontainers/device/api/interface.go +++ b/virtcontainers/device/api/interface.go @@ -10,6 +10,7 @@ import ( "github.com/sirupsen/logrus" "github.com/kata-containers/runtime/virtcontainers/device/config" + persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api" ) var devLogger = logrus.WithField("subsystem", "device") @@ -63,6 +64,9 @@ type Device interface { Reference() uint // Dereference removes one reference to device then returns final ref count Dereference() uint + + // Persist convert and return data in persist format + Dump() persistapi.DeviceState } // DeviceManager can be used to create a new device, this can be used as single diff --git a/virtcontainers/device/drivers/block.go b/virtcontainers/device/drivers/block.go index fdb18f4187..9897118ade 100644 --- a/virtcontainers/device/drivers/block.go +++ b/virtcontainers/device/drivers/block.go @@ -11,6 +11,7 @@ import ( "github.com/kata-containers/runtime/virtcontainers/device/api" "github.com/kata-containers/runtime/virtcontainers/device/config" + persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api" "github.com/kata-containers/runtime/virtcontainers/utils" ) @@ -146,5 +147,27 @@ func (device *BlockDevice) GetDeviceInfo() interface{} { return device.BlockDrive } +// Dump convert and return data in persist format +func (device *BlockDevice) Dump() persistapi.DeviceState { + ds := device.GenericDevice.Dump() + ds.Type = string(device.DeviceType()) + + drive := device.BlockDrive + if drive != nil { + ds.BlockDrive = &persistapi.BlockDrive{ + File: drive.File, + Format: drive.Format, + ID: drive.ID, + Index: drive.Index, + MmioAddr: drive.MmioAddr, + PCIAddr: drive.PCIAddr, + SCSIAddr: drive.SCSIAddr, + NvdimmID: drive.NvdimmID, + VirtPath: drive.VirtPath, + } + } + return ds +} + // It should implement GetAttachCount() and DeviceID() as api.Device implementation // here it shares function from *GenericDevice so we don't need duplicate codes diff --git a/virtcontainers/device/drivers/generic.go b/virtcontainers/device/drivers/generic.go index ff60fc3fb4..d01014f67f 100644 --- a/virtcontainers/device/drivers/generic.go +++ b/virtcontainers/device/drivers/generic.go @@ -11,6 +11,7 @@ import ( "github.com/kata-containers/runtime/virtcontainers/device/api" "github.com/kata-containers/runtime/virtcontainers/device/config" + persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api" ) // GenericDevice refers to a device that is neither a VFIO device or block device. @@ -115,3 +116,22 @@ func (device *GenericDevice) bumpAttachCount(attach bool) (skip bool, err error) } } } + +// Dump convert and return data in persist format +func (device *GenericDevice) Dump() persistapi.DeviceState { + dss := persistapi.DeviceState{ + ID: device.ID, + Type: string(device.DeviceType()), + RefCount: device.RefCount, + AttachCount: device.AttachCount, + } + + info := device.DeviceInfo + if info != nil { + dss.DevType = info.DevType + dss.Major = info.Major + dss.Minor = info.Minor + dss.DriverOptions = info.DriverOptions + } + return dss +} diff --git a/virtcontainers/device/drivers/vfio.go b/virtcontainers/device/drivers/vfio.go index f717692258..bea10dc2d9 100644 --- a/virtcontainers/device/drivers/vfio.go +++ b/virtcontainers/device/drivers/vfio.go @@ -17,6 +17,7 @@ import ( "github.com/kata-containers/runtime/virtcontainers/device/api" "github.com/kata-containers/runtime/virtcontainers/device/config" + persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api" "github.com/kata-containers/runtime/virtcontainers/utils" ) @@ -139,6 +140,25 @@ func (device *VFIODevice) GetDeviceInfo() interface{} { return device.VfioDevs } +// Dump convert and return data in persist format +func (device *VFIODevice) Dump() persistapi.DeviceState { + ds := device.GenericDevice.Dump() + ds.Type = string(device.DeviceType()) + + devs := device.VfioDevs + for _, dev := range devs { + if dev != nil { + ds.VFIODevs = append(ds.VFIODevs, &persistapi.VFIODev{ + ID: dev.ID, + Type: string(dev.Type), + BDF: dev.BDF, + SysfsDev: dev.SysfsDev, + }) + } + } + return ds +} + // It should implement GetAttachCount() and DeviceID() as api.Device implementation // here it shares function from *GenericDevice so we don't need duplicate codes diff --git a/virtcontainers/device/drivers/vhost_user_blk.go b/virtcontainers/device/drivers/vhost_user_blk.go index 78b2885dc1..588c46590b 100644 --- a/virtcontainers/device/drivers/vhost_user_blk.go +++ b/virtcontainers/device/drivers/vhost_user_blk.go @@ -11,6 +11,7 @@ import ( "github.com/kata-containers/runtime/virtcontainers/device/api" "github.com/kata-containers/runtime/virtcontainers/device/config" + persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api" "github.com/kata-containers/runtime/virtcontainers/utils" ) @@ -71,5 +72,18 @@ func (device *VhostUserBlkDevice) GetDeviceInfo() interface{} { return &device.VhostUserDeviceAttrs } +// Dump convert and return data in persist format +func (device *VhostUserBlkDevice) Dump() persistapi.DeviceState { + ds := device.GenericDevice.Dump() + ds.Type = string(device.DeviceType()) + ds.VhostUserDev = &persistapi.VhostUserDeviceAttrs{ + DevID: device.DevID, + SocketPath: device.SocketPath, + Type: string(device.Type), + MacAddress: device.MacAddress, + } + return ds +} + // It should implement GetAttachCount() and DeviceID() as api.Device implementation // here it shares function from *GenericDevice so we don't need duplicate codes diff --git a/virtcontainers/device/drivers/vhost_user_net.go b/virtcontainers/device/drivers/vhost_user_net.go index a276bb6fe5..8f5abb32e2 100644 --- a/virtcontainers/device/drivers/vhost_user_net.go +++ b/virtcontainers/device/drivers/vhost_user_net.go @@ -11,6 +11,7 @@ import ( "github.com/kata-containers/runtime/virtcontainers/device/api" "github.com/kata-containers/runtime/virtcontainers/device/config" + persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api" "github.com/kata-containers/runtime/virtcontainers/utils" ) @@ -72,5 +73,18 @@ func (device *VhostUserNetDevice) GetDeviceInfo() interface{} { return &device.VhostUserDeviceAttrs } +// Dump convert and return data in persist format +func (device *VhostUserNetDevice) Dump() persistapi.DeviceState { + ds := device.GenericDevice.Dump() + ds.Type = string(device.DeviceType()) + ds.VhostUserDev = &persistapi.VhostUserDeviceAttrs{ + DevID: device.DevID, + SocketPath: device.SocketPath, + Type: string(device.Type), + MacAddress: device.MacAddress, + } + return ds +} + // It should implement GetAttachCount() and DeviceID() as api.Device implementation // here it shares function from *GenericDevice so we don't need duplicate codes diff --git a/virtcontainers/device/drivers/vhost_user_scsi.go b/virtcontainers/device/drivers/vhost_user_scsi.go index e4e2a27afa..543949ceb4 100644 --- a/virtcontainers/device/drivers/vhost_user_scsi.go +++ b/virtcontainers/device/drivers/vhost_user_scsi.go @@ -11,6 +11,7 @@ import ( "github.com/kata-containers/runtime/virtcontainers/device/api" "github.com/kata-containers/runtime/virtcontainers/device/config" + persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api" "github.com/kata-containers/runtime/virtcontainers/utils" ) @@ -72,5 +73,18 @@ func (device *VhostUserSCSIDevice) GetDeviceInfo() interface{} { return &device.VhostUserDeviceAttrs } +// Dump convert and return data in persist format +func (device *VhostUserSCSIDevice) Dump() persistapi.DeviceState { + ds := device.GenericDevice.Dump() + ds.Type = string(device.DeviceType()) + ds.VhostUserDev = &persistapi.VhostUserDeviceAttrs{ + DevID: device.DevID, + SocketPath: device.SocketPath, + Type: string(device.Type), + MacAddress: device.MacAddress, + } + return ds +} + // It should implement GetAttachCount() and DeviceID() as api.Device implementation // here it shares function from *GenericDevice so we don't need duplicate codes diff --git a/virtcontainers/mount_test.go b/virtcontainers/mount_test.go index 2edc87e4a7..71d109483a 100644 --- a/virtcontainers/mount_test.go +++ b/virtcontainers/mount_test.go @@ -225,7 +225,7 @@ func TestGetDeviceForPathBindMount(t *testing.T) { t.Fatal(err) } - defer syscall.Unmount(dest, 0) + defer syscall.Unmount(dest, syscall.MNT_DETACH) destFile := filepath.Join(dest, "test") _, err = os.Create(destFile) diff --git a/virtcontainers/persist.go b/virtcontainers/persist.go new file mode 100644 index 0000000000..b6bc5814e9 --- /dev/null +++ b/virtcontainers/persist.go @@ -0,0 +1,171 @@ +// Copyright (c) 2019 Huawei Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package virtcontainers + +import ( + "errors" + + "github.com/kata-containers/runtime/virtcontainers/device/api" + exp "github.com/kata-containers/runtime/virtcontainers/experimental" + "github.com/kata-containers/runtime/virtcontainers/persist" + persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api" + "github.com/kata-containers/runtime/virtcontainers/types" +) + +var ( + errContainerPersistNotExist = errors.New("container doesn't exist in persist data") +) + +func (s *Sandbox) dumpState(ss *persistapi.SandboxState, cs map[string]persistapi.ContainerState) error { + ss.SandboxContainer = s.id + ss.GuestMemoryBlockSizeMB = s.state.GuestMemoryBlockSizeMB + ss.State = string(s.state.State) + ss.CgroupPath = s.state.CgroupPath + + for id, cont := range s.containers { + state := persistapi.ContainerState{} + if v, ok := cs[id]; ok { + state = v + } + state.State = string(cont.state.State) + state.Rootfs = persistapi.RootfsState{ + BlockDeviceID: cont.state.BlockDeviceID, + FsType: cont.state.Fstype, + } + state.CgroupPath = cont.state.CgroupPath + cs[id] = state + } + + // delete removed containers + for id := range cs { + if _, ok := s.containers[id]; !ok { + delete(cs, id) + } + } + + return nil +} + +func (s *Sandbox) dumpHypervisor(ss *persistapi.SandboxState, cs map[string]persistapi.ContainerState) error { + ss.HypervisorState.BlockIndex = s.state.BlockIndex + return nil +} + +func deviceToDeviceState(devices []api.Device) (dss []persistapi.DeviceState) { + for _, dev := range devices { + dss = append(dss, dev.Dump()) + } + return +} + +func (s *Sandbox) dumpDevices(ss *persistapi.SandboxState, cs map[string]persistapi.ContainerState) error { + ss.Devices = deviceToDeviceState(s.devManager.GetAllDevices()) + + for id, cont := range s.containers { + state := persistapi.ContainerState{} + if v, ok := cs[id]; ok { + state = v + } + + state.DeviceMaps = nil + for _, dev := range cont.devices { + state.DeviceMaps = append(state.DeviceMaps, persistapi.DeviceMap{ + ID: dev.ID, + ContainerPath: dev.ContainerPath, + FileMode: dev.FileMode, + UID: dev.UID, + GID: dev.GID, + }) + } + + cs[id] = state + } + + // delete removed containers + for id := range cs { + if _, ok := s.containers[id]; !ok { + delete(cs, id) + } + } + + return nil +} + +// verSaveCallback set persist data version to current version in runtime +func (s *Sandbox) verSaveCallback() { + s.newStore.AddSaveCallback("version", func(ss *persistapi.SandboxState, cs map[string]persistapi.ContainerState) error { + ss.PersistVersion = persistapi.CurPersistVersion + return nil + }) +} + +// stateSaveCallback register hook to set sandbox and container state to persist +func (s *Sandbox) stateSaveCallback() { + s.newStore.AddSaveCallback("state", s.dumpState) +} + +// hvStateSaveCallback register hook to save hypervisor state to persist data +func (s *Sandbox) hvStateSaveCallback() { + s.newStore.AddSaveCallback("hypervisor", s.dumpHypervisor) +} + +// PersistDevices register hook to save device informations +func (s *Sandbox) devicesSaveCallback() { + s.newStore.AddSaveCallback("devices", s.dumpDevices) +} + +func (s *Sandbox) getSbxAndCntStates() (*persistapi.SandboxState, map[string]persistapi.ContainerState, error) { + if err := s.newStore.FromDisk(s.id); err != nil { + return nil, nil, err + } + + return s.newStore.GetStates() +} + +// Restore will restore sandbox data from persist file on disk +func (s *Sandbox) Restore() error { + ss, _, err := s.getSbxAndCntStates() + if err != nil { + return err + } + + s.state.GuestMemoryBlockSizeMB = ss.GuestMemoryBlockSizeMB + s.state.BlockIndex = ss.HypervisorState.BlockIndex + s.state.State = types.StateString(ss.State) + s.state.CgroupPath = ss.CgroupPath + + return nil +} + +// Restore will restore container data from persist file on disk +func (c *Container) Restore() error { + _, cs, err := c.sandbox.getSbxAndCntStates() + if err != nil { + return err + } + + if _, ok := cs[c.id]; !ok { + return errContainerPersistNotExist + } + + c.state = types.ContainerState{ + State: types.StateString(cs[c.id].State), + BlockDeviceID: cs[c.id].Rootfs.BlockDeviceID, + Fstype: cs[c.id].Rootfs.FsType, + CgroupPath: cs[c.id].CgroupPath, + } + + return nil +} + +func (s *Sandbox) supportNewStore() bool { + for _, f := range s.config.Experimental { + if f == persist.NewStoreFeature && exp.Get("newstore") != nil { + return true + } + } + return false +} diff --git a/virtcontainers/persist/api/config.go b/virtcontainers/persist/api/config.go new file mode 100644 index 0000000000..3da8c5e731 --- /dev/null +++ b/virtcontainers/persist/api/config.go @@ -0,0 +1,227 @@ +// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2019 Huawei Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package persistapi + +// Param is a key/value representation for hypervisor and kernel parameters. +type Param struct { + Key string + Value string +} + +// Asset saves hypervisor asset +type Asset struct { + Path string `json:"path"` + Custom bool `json:"bool"` +} + +// HypervisorConfig saves configurations of sandbox hypervisor +type HypervisorConfig struct { + // NumVCPUs specifies default number of vCPUs for the VM. + NumVCPUs uint32 + + //DefaultMaxVCPUs specifies the maximum number of vCPUs for the VM. + DefaultMaxVCPUs uint32 + + // DefaultMem specifies default memory size in MiB for the VM. + MemorySize uint32 + + // DefaultBridges specifies default number of bridges for the VM. + // Bridges can be used to hot plug devices + DefaultBridges uint32 + + // Msize9p is used as the msize for 9p shares + Msize9p uint32 + + // MemSlots specifies default memory slots the VM. + MemSlots uint32 + + // MemOffset specifies memory space for nvdimm device + MemOffset uint32 + + // KernelParams are additional guest kernel parameters. + KernelParams []Param + + // HypervisorParams are additional hypervisor parameters. + HypervisorParams []Param + + // KernelPath is the guest kernel host path. + KernelPath string + + // ImagePath is the guest image host path. + ImagePath string + + // InitrdPath is the guest initrd image host path. + // ImagePath and InitrdPath cannot be set at the same time. + InitrdPath string + + // FirmwarePath is the bios host path + FirmwarePath string + + // MachineAccelerators are machine specific accelerators + MachineAccelerators string + + // HypervisorPath is the hypervisor executable host path. + HypervisorPath string + + // BlockDeviceDriver specifies the driver to be used for block device + // either VirtioSCSI or VirtioBlock with the default driver being defaultBlockDriver + BlockDeviceDriver string + + // HypervisorMachineType specifies the type of machine being + // emulated. + HypervisorMachineType string + + // MemoryPath is the memory file path of VM memory. Used when either BootToBeTemplate or + // BootFromTemplate is true. + MemoryPath string + + // DevicesStatePath is the VM device state file path. Used when either BootToBeTemplate or + // BootFromTemplate is true. + DevicesStatePath string + + // EntropySource is the path to a host source of + // entropy (/dev/random, /dev/urandom or real hardware RNG device) + EntropySource string + + // customAssets is a map of assets. + // Each value in that map takes precedence over the configured assets. + // For example, if there is a value for the "kernel" key in this map, + // it will be used for the sandbox's kernel path instead of KernelPath. + CustomAssets map[string]*Asset + + // BlockDeviceCacheSet specifies cache-related options will be set to block devices or not. + BlockDeviceCacheSet bool + + // BlockDeviceCacheDirect specifies cache-related options for block devices. + // Denotes whether use of O_DIRECT (bypass the host page cache) is enabled. + BlockDeviceCacheDirect bool + + // BlockDeviceCacheNoflush specifies cache-related options for block devices. + // Denotes whether flush requests for the device are ignored. + BlockDeviceCacheNoflush bool + + // DisableBlockDeviceUse disallows a block device from being used. + DisableBlockDeviceUse bool + + // EnableIOThreads enables IO to be processed in a separate thread. + // Supported currently for virtio-scsi driver. + EnableIOThreads bool + + // Debug changes the default hypervisor and kernel parameters to + // enable debug output where available. + Debug bool + + // MemPrealloc specifies if the memory should be pre-allocated + MemPrealloc bool + + // HugePages specifies if the memory should be pre-allocated from huge pages + HugePages bool + + // Realtime Used to enable/disable realtime + Realtime bool + + // Mlock is used to control memory locking when Realtime is enabled + // Realtime=true and Mlock=false, allows for swapping out of VM memory + // enabling higher density + Mlock bool + + // DisableNestingChecks is used to override customizations performed + // when running on top of another VMM. + DisableNestingChecks bool + + // UseVSock use a vsock for agent communication + UseVSock bool + + // HotplugVFIOOnRootBus is used to indicate if devices need to be hotplugged on the + // root bus instead of a bridge. + HotplugVFIOOnRootBus bool + + // BootToBeTemplate used to indicate if the VM is created to be a template VM + BootToBeTemplate bool + + // BootFromTemplate used to indicate if the VM should be created from a template VM + BootFromTemplate bool + + // DisableVhostNet is used to indicate if host supports vhost_net + DisableVhostNet bool + + // GuestHookPath is the path within the VM that will be used for 'drop-in' hooks + GuestHookPath string +} + +// KataAgentConfig is a structure storing information needed +// to reach the Kata Containers agent. +type KataAgentConfig struct { + LongLiveConn bool + UseVSock bool +} + +// HyperstartConfig is a structure storing information needed for +// hyperstart agent initialization. +type HyperstartConfig struct { + SockCtlName string + SockTtyName string +} + +// ProxyConfig is a structure storing information needed from any +// proxy in order to be properly initialized. +type ProxyConfig struct { + Path string + Debug bool +} + +// ShimConfig is the structure providing specific configuration +// for shim implementation. +type ShimConfig struct { + Path string + Debug bool +} + +// NetworkConfig is the network configuration related to a network. +type NetworkConfig struct { +} + +// SandboxConfig is a sandbox configuration. +// Refs: virtcontainers/sandbox.go:SandboxConfig +type SandboxConfig struct { + HypervisorType string + HypervisorConfig HypervisorConfig + + // only one agent config can be non-nil according to agent type + AgentType string + KataAgentConfig *KataAgentConfig `json:",omitempty"` + HyperstartConfig *HyperstartConfig `json:",omitempty"` + + ProxyType string + ProxyConfig ProxyConfig + + ShimType string + KataShimConfig ShimConfig + + NetworkModel string + NetworkConfig NetworkConfig + + ShmSize uint64 + + // SharePidNs sets all containers to share the same sandbox level pid namespace. + SharePidNs bool + + // Stateful keeps sandbox resources in memory across APIs. Users will be responsible + // for calling Release() to release the memory resources. + Stateful bool + + // SystemdCgroup enables systemd cgroup support + SystemdCgroup bool + + // Experimental enables experimental features + Experimental bool + + // Information for fields not saved: + // * Annotation: this is kind of casual data, we don't need casual data in persist file, + // if you know this data needs to persist, please gives it + // a specific field +} diff --git a/virtcontainers/persist/api/container.go b/virtcontainers/persist/api/container.go new file mode 100644 index 0000000000..6b98323476 --- /dev/null +++ b/virtcontainers/persist/api/container.go @@ -0,0 +1,112 @@ +// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2019 Huawei Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package persistapi + +import ( + "os" + "time" +) + +// ============= container level resources ============= + +// DeviceMap saves how host device maps to container device +// one hypervisor device can be +// Refs: virtcontainers/container.go:ContainerDevice +type DeviceMap struct { + // ID reference to VM device + ID string + + // ContainerPath is device path displayed in container + ContainerPath string + + // FileMode permission bits for the device. + FileMode os.FileMode + + // UID is user ID in the container namespace + UID uint32 + + // GID is group ID in the container namespace + GID uint32 +} + +// Mount describes a container mount. +type Mount struct { + Source string + Destination string + + // Type specifies the type of filesystem to mount. + Type string + + // Options list all the mount options of the filesystem. + Options []string + + // HostPath used to store host side bind mount path + HostPath string + + // ReadOnly specifies if the mount should be read only or not + ReadOnly bool + + // BlockDeviceID represents block device that is attached to the + // VM in case this mount is a block device file or a directory + // backed by a block device. + BlockDeviceID string +} + +// RootfsState saves state of container rootfs +type RootfsState struct { + // BlockDeviceID represents container rootfs block device ID + // when backed by devicemapper + BlockDeviceID string + + // RootFStype is file system of the rootfs incase it is block device + FsType string +} + +// Process gathers data related to a container process. +// Refs: virtcontainers/container.go:Process +type Process struct { + // Token is the process execution context ID. It must be + // unique per sandbox. + // Token is used to manipulate processes for containers + // that have not started yet, and later identify them + // uniquely within a sandbox. + Token string + + // Pid is the process ID as seen by the host software + // stack, e.g. CRI-O, containerd. This is typically the + // shim PID. + Pid int + + StartTime time.Time +} + +// ContainerState represents container state +type ContainerState struct { + // State is container running status + State string + + // Rootfs contains information of container rootfs + Rootfs RootfsState + + // CgroupPath is the cgroup hierarchy where sandbox's processes + // including the hypervisor are placed. + CgroupPath string + + // DeviceMaps is mapping between sandbox device to dest in container + DeviceMaps []DeviceMap + + // Mounts is mount info from OCI spec + Mounts []Mount + + // Process on host representing container process + // FIXME: []Process or Process ? + Process []Process + + // BundlePath saves container OCI config.json, which can be unmarshaled + // and translated to "CompatOCISpec" + BundlePath string +} diff --git a/virtcontainers/persist/api/device.go b/virtcontainers/persist/api/device.go new file mode 100644 index 0000000000..42d9acb5f3 --- /dev/null +++ b/virtcontainers/persist/api/device.go @@ -0,0 +1,106 @@ +// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2019 Huawei Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package persistapi + +// ============= sandbox level resources ============= + +// BlockDrive represents a block storage drive which may be used in case the storage +// driver has an underlying block storage device. +type BlockDrive struct { + // File is the path to the disk-image/device which will be used with this drive + File string + + // Format of the drive + Format string + + // ID is used to identify this drive in the hypervisor options. + ID string + + // Index assigned to the drive. In case of virtio-scsi, this is used as SCSI LUN index + Index int + + // MmioAddr is used to identify the slot at which the drive is attached (order?). + MmioAddr string + + // PCIAddr is the PCI address used to identify the slot at which the drive is attached. + PCIAddr string + + // SCSI Address of the block device, in case the device is attached using SCSI driver + // SCSI address is in the format SCSI-Id:LUN + SCSIAddr string + + // NvdimmID is the nvdimm id inside the VM + NvdimmID string + + // VirtPath at which the device appears inside the VM, outside of the container mount namespace + VirtPath string +} + +// VFIODev represents a VFIO drive used for hotplugging +type VFIODev struct { + // ID is used to identify this drive in the hypervisor options. + ID string + + // Type of VFIO device + Type string + + // BDF (Bus:Device.Function) of the PCI address + BDF string + + // Sysfsdev of VFIO mediated device + SysfsDev string +} + +// VhostUserDeviceAttrs represents data shared by most vhost-user devices +type VhostUserDeviceAttrs struct { + DevID string + SocketPath string + Type string + + // MacAddress is only meaningful for vhost user net device + MacAddress string +} + +// DeviceState is sandbox level resource which represents host devices +// plugged to hypervisor, one Device can be shared among containers in POD +// Refs: virtcontainers/device/drivers/generic.go:GenericDevice +type DeviceState struct { + ID string + + // Type is used to specify driver type + // Refs: virtcontainers/device/config/config.go:DeviceType + Type string + + RefCount uint + AttachCount uint + + // Type of device: c, b, u or p + // c , u - character(unbuffered) + // p - FIFO + // b - block(buffered) special file + // More info in mknod(1). + DevType string + + // Major, minor numbers for device. + Major int64 + Minor int64 + + // DriverOptions is specific options for each device driver + // for example, for BlockDevice, we can set DriverOptions["blockDriver"]="virtio-blk" + DriverOptions map[string]string + + // ============ device driver specific data =========== + // BlockDrive is specific for block device driver + BlockDrive *BlockDrive `json:",omitempty"` + + // VFIODev is specific VFIO device driver + VFIODevs []*VFIODev `json:",omitempty"` + + // VhostUserDeviceAttrs is specific for vhost-user device driver + VhostUserDev *VhostUserDeviceAttrs `json:",omitempty"` + // ============ end device driver specific data =========== +} diff --git a/virtcontainers/persist/api/interface.go b/virtcontainers/persist/api/interface.go new file mode 100644 index 0000000000..0b94a61a66 --- /dev/null +++ b/virtcontainers/persist/api/interface.go @@ -0,0 +1,23 @@ +// Copyright (c) 2019 Huawei Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package persistapi + +// PersistDriver is interface describing operations to save/restore persist data +type PersistDriver interface { + // ToDisk flushes data to disk(or other storage media such as a remote db) + ToDisk() error + // FromDisk will restore all data for sandbox with `sid` from storage. + // We only support get data for one whole sandbox + FromDisk(sid string) error + // AddSaveCallback addes callback function named `name` to driver storage list + // The callback functions will be invoked when calling `ToDisk()`, notice that + // callback functions are not order guaranteed, + AddSaveCallback(name string, f SetFunc) + // Destroy will remove everything from storage + Destroy() error + // GetStates will return SandboxState and ContainerState(s) directly + GetStates() (*SandboxState, map[string]ContainerState, error) +} diff --git a/virtcontainers/persist/api/network.go b/virtcontainers/persist/api/network.go new file mode 100644 index 0000000000..c9744682db --- /dev/null +++ b/virtcontainers/persist/api/network.go @@ -0,0 +1,32 @@ +// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2019 Huawei Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package persistapi + +// ============= sandbox level resources ============= + +// NetworkEndpoint contains network interface information +type NetworkEndpoint struct { + Type string + + // ID used to pass the netdev option to qemu + ID string + + // Name of the interface + Name string + + // Index of interface + Index int +} + +// NetworkInfo contains network information of sandbox +type NetworkInfo struct { + NetNsPath string + NetmonPID int + NetNsCreated bool + InterworkingModel string + Endpoints []NetworkEndpoint +} diff --git a/virtcontainers/persist/api/sandbox.go b/virtcontainers/persist/api/sandbox.go new file mode 100644 index 0000000000..da17276106 --- /dev/null +++ b/virtcontainers/persist/api/sandbox.go @@ -0,0 +1,95 @@ +// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2019 Huawei Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package persistapi + +// ============= sandbox level resources ============= + +// SetFunc is function hook used for setting sandbox/container state +// It can be registered to dynamically set state files when dump +type SetFunc (func(*SandboxState, map[string]ContainerState) error) + +// Bridge is a bridge where devices can be hot plugged +type Bridge struct { + // Address contains information about devices plugged and its address in the bridge + DeviceAddr map[uint32]string + + // Type is the type of the bridge (pci, pcie, etc) + Type string + + //ID is used to identify the bridge in the hypervisor + ID string + + // Addr is the PCI/e slot of the bridge + Addr int +} + +// CPUDevice represents a CPU device which was hot-added in a running VM +type CPUDevice struct { + // ID is used to identify this CPU in the hypervisor options. + ID string +} + +// HypervisorState saves state of hypervisor +// Refs: virtcontainers/qemu.go:QemuState +type HypervisorState struct { + Pid int + Bridges []Bridge + // HotpluggedCPUs is the list of CPUs that were hot-added + HotpluggedVCPUs []CPUDevice + HotpluggedMemory int + UUID string + HotplugVFIOOnRootBus bool + BlockIndex int +} + +// ProxyState save proxy state data +type ProxyState struct { + // Pid of proxy process + Pid int + + // URL to connect to proxy + URL string +} + +// SandboxState contains state information of sandbox +// nolint: maligned +type SandboxState struct { + // PersistVersion of persist data format, can be used for keeping compatibility later + PersistVersion uint + + // State is sandbox running status + State string + + // GuestMemoryBlockSizeMB is the size of memory block of guestos + GuestMemoryBlockSizeMB uint32 + + // GuestMemoryHotplugProbe determines whether guest kernel supports memory hotplug probe interface + GuestMemoryHotplugProbe bool + + // SandboxContainer specifies which container is used to start the sandbox/vm + SandboxContainer string + + // CgroupPath is the cgroup hierarchy where sandbox's processes + // including the hypervisor are placed. + // FIXME: sandbox can reuse "SandboxContainer"'s CgroupPath so we can remove this field. + CgroupPath string + + // Devices plugged to sandbox(hypervisor) + Devices []DeviceState + + // HypervisorState saves hypervisor specific data + HypervisorState HypervisorState + + // ProxyState saves state data of proxy process + ProxyState ProxyState + + // Network saves network configuration of sandbox + Network NetworkInfo + + // Config saves config information of sandbox + Config SandboxConfig +} diff --git a/virtcontainers/persist/api/version.go b/virtcontainers/persist/api/version.go new file mode 100644 index 0000000000..9575af7a57 --- /dev/null +++ b/virtcontainers/persist/api/version.go @@ -0,0 +1,19 @@ +// Copyright (c) 2019 Huawei Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package persistapi + +const ( + // CurPersistVersion is current persist data version. + // This can help keep backward compatibility, if you make + // some changes in persistapi package which needs different + // handling process between different runtime versions, you + // should modify `CurPersistVersion` and handle persist data + // according to it. + // If you can't be sure if the change in persistapi package + // requires a bump of CurPersistVersion or not, do it for peace! + // --@WeiZhang555 + CurPersistVersion uint = 1 +) diff --git a/virtcontainers/persist/fs/fs.go b/virtcontainers/persist/fs/fs.go new file mode 100644 index 0000000000..df3bc5b357 --- /dev/null +++ b/virtcontainers/persist/fs/fs.go @@ -0,0 +1,274 @@ +// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2019 Huawei Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package fs + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "syscall" + + persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api" + "github.com/sirupsen/logrus" +) + +// persistFile is the file name for JSON sandbox/container configuration +const persistFile = "persist.json" + +// dirMode is the permission bits used for creating a directory +const dirMode = os.FileMode(0700) + +// fileMode is the permission bits used for creating a file +const fileMode = os.FileMode(0640) + +// storagePathSuffix is the suffix used for all storage paths +// +// Note: this very brief path represents "virtcontainers". It is as +// terse as possible to minimise path length. +const storagePathSuffix = "vc" + +// sandboxPathSuffix is the suffix used for sandbox storage +const sandboxPathSuffix = "sbs" + +// runStoragePath is the sandbox runtime directory. +// It will contain one state.json and one lock file for each created sandbox. +var runStoragePath = filepath.Join("/run", storagePathSuffix, sandboxPathSuffix) + +// FS storage driver implementation +type FS struct { + sandboxState *persistapi.SandboxState + containerState map[string]persistapi.ContainerState + setFuncs map[string]persistapi.SetFunc + + lockFile *os.File +} + +var fsLog = logrus.WithField("source", "virtcontainers/persist/fs") + +// Logger returns a logrus logger appropriate for logging Store messages +func (fs *FS) Logger() *logrus.Entry { + return fsLog.WithFields(logrus.Fields{ + "subsystem": "persist", + }) +} + +// Name returns driver name +func Name() string { + return "fs" +} + +// Init FS persist driver and return abstract PersistDriver +func Init() (persistapi.PersistDriver, error) { + return &FS{ + sandboxState: &persistapi.SandboxState{}, + containerState: make(map[string]persistapi.ContainerState), + setFuncs: make(map[string]persistapi.SetFunc), + }, nil +} + +func (fs *FS) sandboxDir() (string, error) { + id := fs.sandboxState.SandboxContainer + if id == "" { + return "", fmt.Errorf("sandbox container id required") + } + + return filepath.Join(runStoragePath, id), nil +} + +// ToDisk sandboxState and containerState to disk +func (fs *FS) ToDisk() (retErr error) { + // call registered hooks to set sandboxState and containerState + for _, fun := range fs.setFuncs { + fun(fs.sandboxState, fs.containerState) + } + + sandboxDir, err := fs.sandboxDir() + if err != nil { + return err + } + + if err := os.MkdirAll(sandboxDir, dirMode); err != nil { + return err + } + + if err := fs.lock(); err != nil { + return err + } + defer fs.unlock() + + // if error happened, destroy all dirs + defer func() { + if retErr != nil { + if err := fs.Destroy(); err != nil { + fs.Logger().WithError(err).Errorf("failed to destroy dirs") + } + } + }() + + // persist sandbox configuration data + sandboxFile := filepath.Join(sandboxDir, persistFile) + f, err := os.OpenFile(sandboxFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, fileMode) + if err != nil { + return err + } + defer f.Close() + + if err := json.NewEncoder(f).Encode(fs.sandboxState); err != nil { + return err + } + + // persist container configuration data + for cid, cstate := range fs.containerState { + cdir := filepath.Join(sandboxDir, cid) + if err := os.MkdirAll(cdir, dirMode); err != nil { + return err + } + + cfile := filepath.Join(cdir, persistFile) + cf, err := os.OpenFile(cfile, os.O_RDWR|os.O_CREATE|os.O_TRUNC, fileMode) + if err != nil { + return err + } + + if err := json.NewEncoder(cf).Encode(cstate); err != nil { + return err + } + cf.Close() + } + + return nil +} + +// FromDisk restores state for sandbox with name sid +func (fs *FS) FromDisk(sid string) error { + if sid == "" { + return fmt.Errorf("restore requires sandbox id") + } + + fs.sandboxState.SandboxContainer = sid + + sandboxDir, err := fs.sandboxDir() + if err != nil { + return err + } + + if err := fs.lock(); err != nil { + return err + } + defer fs.unlock() + + // get sandbox configuration from persist data + sandboxFile := filepath.Join(sandboxDir, persistFile) + f, err := os.OpenFile(sandboxFile, os.O_RDONLY, fileMode) + if err != nil { + return err + } + defer f.Close() + + if err := json.NewDecoder(f).Decode(fs.sandboxState); err != nil { + return err + } + + // walk sandbox dir and find container + files, err := ioutil.ReadDir(sandboxDir) + if err != nil { + return err + } + + for _, file := range files { + if !file.IsDir() { + continue + } + + cid := file.Name() + cfile := filepath.Join(sandboxDir, cid, persistFile) + cf, err := os.OpenFile(cfile, os.O_RDONLY, fileMode) + if err != nil { + // if persist.json doesn't exist, ignore and go to next + if os.IsNotExist(err) { + continue + } + return err + } + + var cstate persistapi.ContainerState + if err := json.NewDecoder(cf).Decode(&cstate); err != nil { + return err + } + cf.Close() + + fs.containerState[cid] = cstate + } + return nil +} + +// Destroy removes everything from disk +func (fs *FS) Destroy() error { + sandboxDir, err := fs.sandboxDir() + if err != nil { + return err + } + + if err := os.RemoveAll(sandboxDir); err != nil { + return err + } + return nil +} + +// GetStates returns SandboxState and ContainerState +func (fs *FS) GetStates() (*persistapi.SandboxState, map[string]persistapi.ContainerState, error) { + return fs.sandboxState, fs.containerState, nil +} + +// AddSaveCallback registers processing hooks for Dump +func (fs *FS) AddSaveCallback(name string, f persistapi.SetFunc) { + // only accept last registered hook with same name + fs.setFuncs[name] = f +} + +func (fs *FS) lock() error { + sandboxDir, err := fs.sandboxDir() + if err != nil { + return err + } + + f, err := os.Open(sandboxDir) + if err != nil { + return err + } + + if err := syscall.Flock(int(f.Fd()), syscall.LOCK_EX|syscall.LOCK_NB); err != nil { + f.Close() + return err + } + fs.lockFile = f + + return nil +} + +func (fs *FS) unlock() error { + if fs.lockFile == nil { + return nil + } + + lockFile := fs.lockFile + defer lockFile.Close() + fs.lockFile = nil + if err := syscall.Flock(int(lockFile.Fd()), syscall.LOCK_UN); err != nil { + return err + } + + return nil +} + +// TestSetRunStoragePath set runStoragePath to path +// this function is only used for testing purpose +func TestSetRunStoragePath(path string) { + runStoragePath = path +} diff --git a/virtcontainers/persist/fs/fs_test.go b/virtcontainers/persist/fs/fs_test.go new file mode 100644 index 0000000000..e9ad01b340 --- /dev/null +++ b/virtcontainers/persist/fs/fs_test.go @@ -0,0 +1,106 @@ +// Copyright (c) 2019 Huawei Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package fs + +import ( + "fmt" + "os" + "testing" + + persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api" + "github.com/stretchr/testify/assert" +) + +func getFsDriver() (*FS, error) { + driver, err := Init() + if err != nil { + return nil, fmt.Errorf("failed to init fs driver") + } + fs, ok := driver.(*FS) + if !ok { + return nil, fmt.Errorf("failed to convert driver to *FS") + } + + return fs, nil +} + +func TestFsLock(t *testing.T) { + fs, err := getFsDriver() + assert.Nil(t, err) + assert.NotNil(t, fs) + + fs.sandboxState.SandboxContainer = "test-fs-driver" + sandboxDir, err := fs.sandboxDir() + assert.Nil(t, err) + + err = os.MkdirAll(sandboxDir, dirMode) + assert.Nil(t, err) + + assert.Nil(t, fs.lock()) + assert.NotNil(t, fs.lock()) + + assert.Nil(t, fs.unlock()) + assert.Nil(t, fs.unlock()) +} + +func TestFsDriver(t *testing.T) { + fs, err := getFsDriver() + assert.Nil(t, err) + assert.NotNil(t, fs) + + fs.AddSaveCallback("test", func(ss *persistapi.SandboxState, cs map[string]persistapi.ContainerState) error { + return nil + }) + // missing sandbox container id + assert.NotNil(t, fs.ToDisk()) + + id := "test-fs-driver" + // missing sandbox container id + fs.AddSaveCallback("test", func(ss *persistapi.SandboxState, cs map[string]persistapi.ContainerState) error { + ss.SandboxContainer = id + return nil + }) + assert.Nil(t, fs.ToDisk()) + + fs.AddSaveCallback("test1", func(ss *persistapi.SandboxState, cs map[string]persistapi.ContainerState) error { + ss.State = "running" + return nil + }) + + // try non-existent dir + assert.NotNil(t, fs.FromDisk("test-fs")) + + // since we didn't call ToDisk, Callbacks are not invoked, and state is still empty in disk file + assert.Nil(t, fs.FromDisk(id)) + ss, cs, err := fs.GetStates() + assert.Nil(t, err) + assert.NotNil(t, ss) + assert.Equal(t, len(cs), 0) + + assert.Equal(t, ss.SandboxContainer, id) + assert.Equal(t, ss.State, "") + + // flush all to disk + assert.Nil(t, fs.ToDisk()) + assert.Nil(t, fs.FromDisk(id)) + ss, cs, err = fs.GetStates() + assert.Nil(t, err) + assert.NotNil(t, ss) + assert.Equal(t, len(cs), 0) + + assert.Equal(t, ss.SandboxContainer, id) + assert.Equal(t, ss.State, "running") + + assert.Nil(t, fs.Destroy()) + + dir, err := fs.sandboxDir() + assert.Nil(t, err) + assert.NotEqual(t, len(dir), 0) + + _, err = os.Stat(dir) + assert.NotNil(t, err) + assert.True(t, os.IsNotExist(err)) +} diff --git a/virtcontainers/persist/manager.go b/virtcontainers/persist/manager.go new file mode 100644 index 0000000000..7e46b36bce --- /dev/null +++ b/virtcontainers/persist/manager.go @@ -0,0 +1,47 @@ +// Copyright (c) 2019 Huawei Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package persist + +import ( + "fmt" + + exp "github.com/kata-containers/runtime/virtcontainers/experimental" + "github.com/kata-containers/runtime/virtcontainers/persist/api" + "github.com/kata-containers/runtime/virtcontainers/persist/fs" +) + +type initFunc (func() (persistapi.PersistDriver, error)) + +var ( + // NewStoreFeature is an experimental feature + NewStoreFeature = exp.Feature{ + Name: "newstore", + Description: "This is a new storage driver which reorganized disk data structures, it has to be an experimental feature since it breaks backward compatibility.", + ExpRelease: "2.0", + } + expErr error + supportedDrivers = map[string]initFunc{ + + "fs": fs.Init, + } +) + +func init() { + expErr = exp.Register(NewStoreFeature) +} + +// GetDriver returns new PersistDriver according to driver name +func GetDriver(name string) (persistapi.PersistDriver, error) { + if expErr != nil { + return nil, expErr + } + + if f, ok := supportedDrivers[name]; ok { + return f() + } + + return nil, fmt.Errorf("failed to get storage driver %q", name) +} diff --git a/virtcontainers/persist/manager_test.go b/virtcontainers/persist/manager_test.go new file mode 100644 index 0000000000..f0d5b0383e --- /dev/null +++ b/virtcontainers/persist/manager_test.go @@ -0,0 +1,22 @@ +// Copyright (c) 2019 Huawei Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package persist + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestGetDriver(t *testing.T) { + nonexist, err := GetDriver("non-exist") + assert.NotNil(t, err) + assert.Nil(t, nonexist) + + fsDriver, err := GetDriver("fs") + assert.Nil(t, err) + assert.NotNil(t, fsDriver) +} diff --git a/virtcontainers/persist/plugin/README.md b/virtcontainers/persist/plugin/README.md new file mode 100644 index 0000000000..ec230d15b8 --- /dev/null +++ b/virtcontainers/persist/plugin/README.md @@ -0,0 +1,2 @@ +This package is a simple placeholder currently which will contain persist storage plugin support, +e.g. leveldb, sqlite and other possible storage implementations. diff --git a/virtcontainers/persist_test.go b/virtcontainers/persist_test.go new file mode 100644 index 0000000000..425b33d6c6 --- /dev/null +++ b/virtcontainers/persist_test.go @@ -0,0 +1,133 @@ +// Copyright (c) 2019 Huawei Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package virtcontainers + +import ( + "context" + "fmt" + "os" + "testing" + + "github.com/stretchr/testify/assert" + + exp "github.com/kata-containers/runtime/virtcontainers/experimental" + "github.com/kata-containers/runtime/virtcontainers/persist" + "github.com/kata-containers/runtime/virtcontainers/types" +) + +func testCreateExpSandbox() (*Sandbox, error) { + sconfig := SandboxConfig{ + ID: "test-exp", + HypervisorType: MockHypervisor, + HypervisorConfig: newHypervisorConfig(nil, nil), + AgentType: NoopAgentType, + NetworkConfig: NetworkConfig{}, + Volumes: nil, + Containers: nil, + Experimental: []exp.Feature{persist.NewStoreFeature}, + } + + // support experimental + sandbox, err := createSandbox(context.Background(), sconfig, nil) + if err != nil { + return nil, fmt.Errorf("Could not create sandbox: %s", err) + } + + if err := sandbox.agent.startSandbox(sandbox); err != nil { + return nil, err + } + + return sandbox, nil +} + +func TestSupportNewStore(t *testing.T) { + hConfig := newHypervisorConfig(nil, nil) + sandbox, err := testCreateSandbox(t, testSandboxID, MockHypervisor, hConfig, NoopAgentType, NetworkConfig{}, nil, nil) + if err != nil { + t.Fatal(err) + } + defer cleanUp() + + // not support experimental + assert.False(t, sandbox.supportNewStore()) + + // support experimental + sandbox, err = testCreateExpSandbox() + if err != nil { + t.Fatal(err) + } + assert.True(t, sandbox.supportNewStore()) +} + +func TestSandboxRestore(t *testing.T) { + var err error + sconfig := SandboxConfig{ + ID: "test-exp", + Experimental: []exp.Feature{persist.NewStoreFeature}, + } + container := make(map[string]*Container) + container["test-exp"] = &Container{} + + sandbox := Sandbox{ + id: "test-exp", + containers: container, + hypervisor: &mockHypervisor{}, + ctx: context.Background(), + config: &sconfig, + } + + if sandbox.newStore, err = persist.GetDriver("fs"); err != nil || sandbox.newStore == nil { + t.Fatalf("failed to get fs persist driver") + } + + // if we don't call ToDisk, we can get nothing from disk + err = sandbox.Restore() + assert.NotNil(t, err) + assert.True(t, os.IsNotExist(err)) + + // disk data are empty + err = sandbox.newStore.ToDisk() + assert.Nil(t, err) + + err = sandbox.Restore() + assert.Nil(t, err) + assert.Equal(t, sandbox.state.State, types.StateString("")) + assert.Equal(t, sandbox.state.GuestMemoryBlockSizeMB, uint32(0)) + assert.Equal(t, sandbox.state.BlockIndex, 0) + + // register callback function + sandbox.stateSaveCallback() + + sandbox.state.State = types.StateString("running") + sandbox.state.GuestMemoryBlockSizeMB = uint32(1024) + sandbox.state.BlockIndex = 2 + // flush data to disk + err = sandbox.newStore.ToDisk() + assert.Nil(t, err) + + // empty the sandbox + sandbox.state = types.SandboxState{} + + // restore data from disk + err = sandbox.Restore() + assert.Nil(t, err) + assert.Equal(t, sandbox.state.State, types.StateString("running")) + assert.Equal(t, sandbox.state.GuestMemoryBlockSizeMB, uint32(1024)) + // require hvStateSaveCallback to make it savable + assert.Equal(t, sandbox.state.BlockIndex, 0) + + // after use hvStateSaveCallbck, BlockIndex can be saved now + sandbox.state.BlockIndex = 2 + sandbox.hvStateSaveCallback() + err = sandbox.newStore.ToDisk() + assert.Nil(t, err) + err = sandbox.Restore() + assert.Nil(t, err) + assert.Equal(t, sandbox.state.State, types.StateString("running")) + assert.Equal(t, sandbox.state.GuestMemoryBlockSizeMB, uint32(1024)) + assert.Equal(t, sandbox.state.BlockIndex, 2) + +} diff --git a/virtcontainers/sandbox.go b/virtcontainers/sandbox.go index 1faf9f4441..be161b6b4a 100644 --- a/virtcontainers/sandbox.go +++ b/virtcontainers/sandbox.go @@ -27,6 +27,8 @@ import ( "github.com/kata-containers/runtime/virtcontainers/device/drivers" deviceManager "github.com/kata-containers/runtime/virtcontainers/device/manager" exp "github.com/kata-containers/runtime/virtcontainers/experimental" + "github.com/kata-containers/runtime/virtcontainers/persist" + persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api" "github.com/kata-containers/runtime/virtcontainers/pkg/annotations" vcTypes "github.com/kata-containers/runtime/virtcontainers/pkg/types" "github.com/kata-containers/runtime/virtcontainers/store" @@ -159,8 +161,11 @@ type Sandbox struct { hypervisor hypervisor agent agent store *store.VCStore - network Network - monitor *monitor + // store is used to replace VCStore step by step + newStore persistapi.PersistDriver + + network Network + monitor *monitor config *SandboxConfig @@ -430,8 +435,10 @@ func (s *Sandbox) getAndStoreGuestDetails() error { } s.state.GuestMemoryHotplugProbe = guestDetailRes.SupportMemHotplugProbe - if err = s.store.Store(store.State, s.state); err != nil { - return err + if !s.supportNewStore() { + if err = s.store.Store(store.State, s.state); err != nil { + return err + } } } @@ -472,14 +479,29 @@ func createSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Fac } s.devManager = deviceManager.NewDeviceManager(sandboxConfig.HypervisorConfig.BlockDeviceDriver, devices) - // We first try to fetch the sandbox state from storage. - // If it exists, this means this is a re-creation, i.e. - // we don't need to talk to the guest's agent, but only - // want to create the sandbox and its containers in memory. - state, err := s.store.LoadState() - if err == nil && state.State != "" { - s.state = state - return s, nil + if s.supportNewStore() { + // register persist hook for now, data will be written to disk by ToDisk() + s.stateSaveCallback() + s.hvStateSaveCallback() + s.devicesSaveCallback() + + if err := s.Restore(); err == nil && s.state.State != "" { + return s, nil + } + + // if sandbox doesn't exist, set persist version to current version + // otherwise do nothing + s.verSaveCallback() + } else { + // We first try to fetch the sandbox state from storage. + // If it exists, this means this is a re-creation, i.e. + // we don't need to talk to the guest's agent, but only + // want to create the sandbox and its containers in memory. + state, err := s.store.LoadState() + if err == nil && state.State != "" { + s.state = state + return s, nil + } } // Below code path is called only during create, because of earlier check. @@ -536,6 +558,10 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor s.store = vcStore + if s.newStore, err = persist.GetDriver("fs"); err != nil || s.newStore == nil { + return nil, fmt.Errorf("failed to get fs persist driver") + } + if err = globalSandboxList.addSandbox(s); err != nil { return nil, err } @@ -586,6 +612,13 @@ func (s *Sandbox) storeSandbox() error { } } + if s.supportNewStore() { + // flush data to storage + if err := s.newStore.ToDisk(); err != nil { + return err + } + } + return nil } @@ -1010,7 +1043,9 @@ func (s *Sandbox) addContainer(c *Container) error { ann := c.GetAnnotations() if ann[annotations.ContainerTypeKey] == string(PodSandbox) { s.state.CgroupPath = c.state.CgroupPath - return s.store.Store(store.State, s.state) + if !s.supportNewStore() { + return s.store.Store(store.State, s.state) + } } return nil @@ -1078,6 +1113,10 @@ func (s *Sandbox) CreateContainer(contConfig ContainerConfig) (VCContainer, erro return nil, err } + if err = s.storeSandbox(); err != nil { + return nil, err + } + return c, nil } @@ -1094,6 +1133,11 @@ func (s *Sandbox) StartContainer(containerID string) (VCContainer, error) { if err != nil { return nil, err } + + if err = s.storeSandbox(); err != nil { + return nil, err + } + //Fixme Container delete from sandbox, need to update resources return c, nil @@ -1112,6 +1156,9 @@ func (s *Sandbox) StopContainer(containerID string) (VCContainer, error) { return nil, err } + if err = s.storeSandbox(); err != nil { + return nil, err + } return c, nil } @@ -1124,7 +1171,14 @@ func (s *Sandbox) KillContainer(containerID string, signal syscall.Signal, all b } // Send a signal to the process. - return c.kill(signal, all) + if err := c.kill(signal, all); err != nil { + return err + } + + if err = s.storeSandbox(); err != nil { + return err + } + return nil } // DeleteContainer deletes a container from the sandbox @@ -1158,6 +1212,9 @@ func (s *Sandbox) DeleteContainer(containerID string) (VCContainer, error) { return nil, err } + if err = s.storeSandbox(); err != nil { + return nil, err + } return c, nil } @@ -1236,7 +1293,13 @@ func (s *Sandbox) UpdateContainer(containerID string, resources specs.LinuxResou return err } - return c.storeContainer() + if err := c.storeContainer(); err != nil { + return err + } + if err = s.storeSandbox(); err != nil { + return err + } + return nil } // StatsContainer return the stats of a running container @@ -1263,7 +1326,14 @@ func (s *Sandbox) PauseContainer(containerID string) error { } // Pause the container. - return c.pause() + if err := c.pause(); err != nil { + return err + } + + if err = s.storeSandbox(); err != nil { + return err + } + return nil } // ResumeContainer resumes a paused container. @@ -1275,7 +1345,14 @@ func (s *Sandbox) ResumeContainer(containerID string) error { } // Resume the container. - return c.resume() + if err := c.resume(); err != nil { + return err + } + + if err = s.storeSandbox(); err != nil { + return err + } + return nil } // createContainers registers all containers to the proxy, create the @@ -1306,6 +1383,9 @@ func (s *Sandbox) createContainers() error { if err := s.updateCgroups(); err != nil { return err } + if err := s.storeSandbox(); err != nil { + return err + } return nil } @@ -1327,6 +1407,10 @@ func (s *Sandbox) Start() error { } } + if err := s.storeSandbox(); err != nil { + return err + } + s.Logger().Info("Sandbox is started") return nil @@ -1362,7 +1446,15 @@ func (s *Sandbox) Stop() error { } // Remove the network. - return s.removeNetwork() + if err := s.removeNetwork(); err != nil { + return err + } + + if err := s.storeSandbox(); err != nil { + return err + } + + return nil } // Pause pauses the sandbox @@ -1379,7 +1471,15 @@ func (s *Sandbox) Pause() error { s.monitor.stop() } - return s.pauseSetStates() + if err := s.pauseSetStates(); err != nil { + return err + } + + if err := s.storeSandbox(); err != nil { + return err + } + + return nil } // Resume resumes the sandbox @@ -1388,7 +1488,15 @@ func (s *Sandbox) Resume() error { return err } - return s.resumeSetStates() + if err := s.resumeSetStates(); err != nil { + return err + } + + if err := s.storeSandbox(); err != nil { + return err + } + + return nil } // list lists all sandbox running on the host. @@ -1412,7 +1520,10 @@ func (s *Sandbox) setSandboxState(state types.StateString) error { s.state.State = state // update on-disk state - return s.store.Store(store.State, s.state) + if !s.supportNewStore() { + return s.store.Store(store.State, s.state) + } + return nil } func (s *Sandbox) pauseSetStates() error { @@ -1444,9 +1555,12 @@ func (s *Sandbox) getAndSetSandboxBlockIndex() (int, error) { // Increment so that container gets incremented block index s.state.BlockIndex++ - // update on-disk state - if err := s.store.Store(store.State, s.state); err != nil { - return -1, err + if !s.supportNewStore() { + // experimental runtime use "persist.json" which doesn't need "state.json" anymore + // update on-disk state + if err := s.store.Store(store.State, s.state); err != nil { + return -1, err + } } return currentIndex, nil @@ -1457,9 +1571,12 @@ func (s *Sandbox) getAndSetSandboxBlockIndex() (int, error) { func (s *Sandbox) decrementSandboxBlockIndex() error { s.state.BlockIndex-- - // update on-disk state - if err := s.store.Store(store.State, s.state); err != nil { - return err + if !s.supportNewStore() { + // experimental runtime use "persist.json" which doesn't need "state.json" anymore + // update on-disk state + if err := s.store.Store(store.State, s.state); err != nil { + return err + } } return nil diff --git a/virtcontainers/sandbox_test.go b/virtcontainers/sandbox_test.go index 70ed5906b9..2188972d10 100644 --- a/virtcontainers/sandbox_test.go +++ b/virtcontainers/sandbox_test.go @@ -762,15 +762,11 @@ func TestContainerStateSetFstype(t *testing.T) { hConfig := newHypervisorConfig(nil, nil) sandbox, err := testCreateSandbox(t, testSandboxID, MockHypervisor, hConfig, NoopAgentType, NetworkConfig{}, containers, nil) - if err != nil { - t.Fatal(err) - } + assert.Nil(t, err) defer cleanUp() vcStore, err := store.NewVCSandboxStore(sandbox.ctx, sandbox.id) - if err != nil { - t.Fatal(err) - } + assert.Nil(t, err) sandbox.store = vcStore c := sandbox.GetContainer("100") @@ -827,6 +823,11 @@ func TestContainerStateSetFstype(t *testing.T) { t.Fatal() } + // experimental features doesn't write state.json + if sandbox.supportNewStore() { + return + } + var res types.ContainerState err = json.Unmarshal([]byte(string(fileData)), &res) if err != nil { diff --git a/virtcontainers/virtcontainers_test.go b/virtcontainers/virtcontainers_test.go index 4faf72f647..424a83ed6d 100644 --- a/virtcontainers/virtcontainers_test.go +++ b/virtcontainers/virtcontainers_test.go @@ -14,6 +14,7 @@ import ( "path/filepath" "testing" + "github.com/kata-containers/runtime/virtcontainers/persist/fs" "github.com/kata-containers/runtime/virtcontainers/store" "github.com/sirupsen/logrus" ) @@ -104,6 +105,7 @@ func TestMain(m *testing.M) { // allow the tests to run without affecting the host system. store.ConfigStoragePath = filepath.Join(testDir, store.StoragePathSuffix, "config") store.RunStoragePath = filepath.Join(testDir, store.StoragePathSuffix, "run") + fs.TestSetRunStoragePath(filepath.Join(testDir, "vc", "sbs")) // set now that configStoragePath has been overridden. sandboxDirConfig = filepath.Join(store.ConfigStoragePath, testSandboxID)