diff --git a/pkg/blockdev/blockdev.go b/pkg/blockdev/blockdev.go new file mode 100644 index 000000000..732905c4f --- /dev/null +++ b/pkg/blockdev/blockdev.go @@ -0,0 +1,245 @@ +/* +Copyright 2018 Mirantis + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package blockdev + +import ( + "crypto/sha256" + "encoding/binary" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/Mirantis/virtlet/pkg/utils" + "github.com/golang/glog" +) + +const ( + // VirtletLogicalDevicePrefix denotes the required prefix for + // the virtual block devices created by Virtlet. + VirtletLogicalDevicePrefix = "virtlet-dm-" + virtletRootfsMagic = 0x263dbe52ba576702 + virtletRootfsMetadataVersion = 1 + sectorSize = 512 + devnameUeventVar = "DEVNAME=" +) + +type virtletRootfsHeader struct { + Magic uint64 + MetadataVersion uint16 + ImageHash [sha256.Size]byte +} + +// LogicalDeviceHandler makes it possible to store metadata in the +// first sector of a block device, making the rest of the device +// available as another logical device managed by the device mapper. +type LogicalDeviceHandler struct { + commander utils.Commander + devPath string + sysfsPath string +} + +// NewLogicalDeviceHandler creates a new LogicalDeviceHandler using +// the specified commander and paths that should be used in place of +// /dev and /sys directories (empty string to use /dev and /sys, +// respectively) +func NewLogicalDeviceHandler(commander utils.Commander, devPath, sysfsPath string) *LogicalDeviceHandler { + if devPath == "" { + devPath = "/dev" + } + if sysfsPath == "" { + sysfsPath = "/sys" + } + return &LogicalDeviceHandler{commander, devPath, sysfsPath} +} + +// EnsureDevHeaderMatches returns true if the specified block device +// has proper Virtlet header that matches the specified image hash +func (ldh *LogicalDeviceHandler) EnsureDevHeaderMatches(devPath string, imageHash [sha256.Size]byte) (bool, error) { + f, err := os.OpenFile(devPath, os.O_RDWR|os.O_SYNC, 0) + if err != nil { + return false, fmt.Errorf("open %q: %v", devPath, err) + } + defer func() { + if f != nil { + f.Close() + } + }() + + var hdr virtletRootfsHeader + if err := binary.Read(f, binary.BigEndian, &hdr); err != nil { + return false, fmt.Errorf("reading rootfs header: %v", err) + } + + headerMatch := true + switch { + case hdr.Magic != virtletRootfsMagic || hdr.ImageHash != imageHash: + headerMatch = false + if _, err := f.Seek(0, os.SEEK_SET); err != nil { + return false, fmt.Errorf("seek: %v", err) + } + if err := binary.Write(f, binary.BigEndian, virtletRootfsHeader{ + Magic: virtletRootfsMagic, + MetadataVersion: virtletRootfsMetadataVersion, + ImageHash: imageHash, + }); err != nil { + return false, fmt.Errorf("writing rootfs header: %v", err) + } + case hdr.MetadataVersion != virtletRootfsMetadataVersion: + // NOTE: we should handle earlier metadata versions + // after we introduce new ones. But we can't handle + // future metadata versions and any non-matching + // metadata versions are future ones currently, so we + // don't want to lose any data here. + return false, fmt.Errorf("unsupported virtlet root device metadata version %v", hdr.MetadataVersion) + } + + if err := f.Close(); err != nil { + return false, fmt.Errorf("error closing rootfs device: %v", err) + } + f = nil + return headerMatch, nil +} + +// blockDevSizeInSectors returns the size of the block device in sectors +func (ldh *LogicalDeviceHandler) blockDevSizeInSectors(devPath string) (uint64, error) { + // NOTE: this is also doable via ioctl but this way it's + // shorter (no need for fake non-linux impl, extra interface, + // extra fake impl for it). Some links that may help if we + // decide to use the ioctl later on: + // https://github.com/karelzak/util-linux/blob/master/disk-utils/blockdev.c + // https://github.com/aicodix/smr/blob/24aa589f378827a69a07d220f114c169693dacec/smr.go#L29 + out, err := ldh.commander.Command("blockdev", "--getsz", devPath).Run(nil) + if err != nil { + return 0, err + } + nSectors, err := strconv.ParseUint(strings.TrimSpace(string(out)), 10, 64) + if err != nil { + return 0, fmt.Errorf("bad size value returned by blockdev: %q: %v", out, err) + } + return nSectors, nil +} + +// Map maps the device sectors starting from 1 to a new virtual block +// device. dmName specifies the name of the new device. +func (ldh *LogicalDeviceHandler) Map(devPath, dmName string, imageSize uint64) error { + if !strings.HasPrefix(dmName, VirtletLogicalDevicePrefix) { + return fmt.Errorf("bad logical device name %q: must have prefix %q", dmName, VirtletLogicalDevicePrefix) + } + + nSectors, err := ldh.blockDevSizeInSectors(devPath) + if err != nil { + return err + } + + // sector 0 is reserved for the Virtlet metadata + minSectors := (imageSize+sectorSize-1)/sectorSize + 1 + if nSectors < minSectors { + return fmt.Errorf("block device too small for the image: need at least %d bytes (%d sectors) but got %d bytes (%d sectors)", + minSectors*sectorSize, + minSectors, + nSectors*sectorSize, + nSectors) + } + + hostPath, err := filepath.EvalSymlinks(devPath) + if err != nil { + return err + } + + dmTable := fmt.Sprintf("0 %d linear %s 1\n", nSectors-1, hostPath) + _, err = ldh.commander.Command("dmsetup", "create", dmName).Run([]byte(dmTable)) + return err +} + +// Unmap unmaps the virtual block device +func (ldh *LogicalDeviceHandler) Unmap(dmName string) error { + _, err := ldh.commander.Command("dmsetup", "remove", dmName).Run(nil) + return err +} + +// ListVirtletLogicalDevices returns a list of logical devices managed +// by Virtlet +func (ldh *LogicalDeviceHandler) ListVirtletLogicalDevices() ([]string, error) { + table, err := ldh.commander.Command("dmsetup", "table").Run(nil) + if err != nil { + return nil, fmt.Errorf("dmsetup table: %v", err) + } + var r []string + for _, l := range strings.Split(string(table), "\n") { + if l == "" { + continue + } + fields := strings.Fields(l) + if len(fields) != 6 || fields[3] != "linear" { + continue + } + virtDevName := fields[0] + if strings.HasSuffix(virtDevName, ":") { + virtDevName = virtDevName[:len(virtDevName)-1] + } + + devID := fields[4] + ueventFile := filepath.Join(ldh.sysfsPath, "dev/block", devID, "uevent") + ueventContent, err := ioutil.ReadFile(ueventFile) + if err != nil { + glog.Warningf("error reading %q: %v", ueventFile, err) + continue + } + devName := "" + for _, ul := range strings.Split(string(ueventContent), "\n") { + ul = strings.TrimSpace(ul) + if strings.HasPrefix(ul, devnameUeventVar) { + devName = ul[len(devnameUeventVar):] + break + } + } + if devName == "" { + glog.Warningf("bad uevent file %q: no DEVNAME", ueventFile) + continue + } + + isVbd, err := ldh.deviceHasVirtletHeader(devName) + if err != nil { + glog.Warningf("checking device file %q: %v", devName, err) + continue + } + + if isVbd { + r = append(r, virtDevName) + } + } + + return r, nil +} + +func (ldh *LogicalDeviceHandler) deviceHasVirtletHeader(devName string) (bool, error) { + f, err := os.Open(filepath.Join(ldh.devPath, devName)) + if err != nil { + return false, err + } + defer f.Close() + + var hdr virtletRootfsHeader + if err := binary.Read(f, binary.BigEndian, &hdr); err != nil { + return false, err + } + + return hdr.Magic == virtletRootfsMagic, nil +} diff --git a/pkg/blockdev/blockdev_test.go b/pkg/blockdev/blockdev_test.go new file mode 100644 index 000000000..379c40f9f --- /dev/null +++ b/pkg/blockdev/blockdev_test.go @@ -0,0 +1,186 @@ +/* +Copyright 2018 Mirantis + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package blockdev + +import ( + "crypto/sha256" + "os" + "path/filepath" + "reflect" + "testing" + + fake "github.com/Mirantis/virtlet/pkg/blockdev/fake" + "github.com/Mirantis/virtlet/pkg/utils" + fakeutils "github.com/Mirantis/virtlet/pkg/utils/fake" + testutils "github.com/Mirantis/virtlet/pkg/utils/testing" +) + +func TestDevHeader(t *testing.T) { + for _, tc := range []struct { + name string + content []string + dmPath string + fileSize uint64 + imageWrittenAgain bool + errors [2]string + }{ + { + name: "image unchanged", + content: []string{"image1", "image1"}, + fileSize: 8704, // just added a sector + }, + { + name: "image change", + content: []string{"image1", "image2"}, + fileSize: 16384, + imageWrittenAgain: true, + }, + { + name: "first image too big", + content: []string{"image1"}, + fileSize: 4096, + errors: [2]string{ + "too small", + "", + }, + }, + { + name: "second image too big", + content: []string{"image1", "image2"}, + fileSize: 8704, + errors: [2]string{ + "", + "too small", + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + fake.WithFakeRootDev(t, tc.fileSize, func(devPath, devDir string) { + for n, content := range tc.content { + if content == "" { + continue + } + cmd := fakeutils.NewCommander(nil, nil) + + ldh := NewLogicalDeviceHandler(cmd, "", "") + headerExpectedToMatch := n > 0 && tc.content[n-1] == content + imageHash := sha256.Sum256([]byte(content)) + headerMatches, err := ldh.EnsureDevHeaderMatches(devPath, imageHash) + if err != nil { + t.Fatalf("EnsureDevHeaderMatches: %v", err) + } + + switch { + case headerMatches == headerExpectedToMatch: + // ok + case headerMatches: + t.Errorf("[%d] the header is expected to match but didn't", n) + case !headerMatches: + t.Errorf("[%d] the header is not expected to match but did", n) + } + } + }) + }) + } +} + +func TestCreateRemoveVirtualBlockDevice(t *testing.T) { + fake.WithFakeRootDev(t, 0, func(devPath, devDir string) { + rec := testutils.NewToplevelRecorder() + cmd := fakeutils.NewCommander(rec, []fakeutils.CmdSpec{ + { + Match: "blockdev --getsz", + Stdout: "4", + }, + { + Match: "dmsetup create", + }, + { + Match: "dmsetup remove", + }, + }) + cmd.ReplaceTempPath("__dev__", "/dev") + symlinkPath := filepath.Join(devDir, "rootdevlink") + if err := os.Symlink(devPath, symlinkPath); err != nil { + t.Fatalf("Symlink(): %v", err) + } + + ldh := NewLogicalDeviceHandler(cmd, "", "") + if err := ldh.Map(symlinkPath, "virtlet-dm-foobar", 1024); err != nil { + t.Fatalf("Map(): %v", err) + } + if err := ldh.Unmap("virtlet-dm-foobar"); err != nil { + t.Fatalf("Unmap(): %v", err) + } + + expectedRecs := []*testutils.Record{ + { + Name: "CMD", + Value: map[string]string{ + "cmd": "blockdev --getsz /dev/rootdevlink", + "stdout": "4", + }, + }, + { + Name: "CMD", + Value: map[string]string{ + "cmd": "dmsetup create virtlet-dm-foobar", + "stdin": "0 3 linear /dev/rootdev 1\n", + }, + }, + { + Name: "CMD", + Value: map[string]string{ + "cmd": "dmsetup remove virtlet-dm-foobar", + }, + }, + } + if !reflect.DeepEqual(expectedRecs, rec.Content()) { + t.Errorf("bad commands recorded:\n%s\ninstead of\n%s", utils.ToJSON(rec.Content()), utils.ToJSON(expectedRecs)) + } + }) +} + +func TestIsVirtletBlockDevice(t *testing.T) { + fake.WithFakeRootDevsAndSysfs(t, func(devPaths []string, table, devDir, sysfsDir string) { + cmd := fakeutils.NewCommander(nil, []fakeutils.CmdSpec{ + { + Match: "^dmsetup table$", + Stdout: table, + }, + }) + ldh := NewLogicalDeviceHandler(cmd, devDir, sysfsDir) + for _, devPath := range devPaths { + if _, err := ldh.EnsureDevHeaderMatches(devPath, sha256.Sum256([]byte("foobar"))); err != nil { + t.Fatalf("EnsureDevHeaderMatches(): %v", err) + } + } + + devs, err := ldh.ListVirtletLogicalDevices() + if err != nil { + t.Fatalf("ListVirtletLogicalDevices(): %v", err) + } + + expectedDevs := []string{ + "virtlet-dm-5edfe2ad-9852-439b-bbfb-3fe8b7c72906", + "virtlet-dm-9a322047-1f0d-4395-8e43-6e1b310ce6f3", + } + if !reflect.DeepEqual(devs, expectedDevs) { + t.Errorf("bad Virtlet block device list: %s instead of %s", utils.ToJSONUnindented(devs), utils.ToJSONUnindented(expectedDevs)) + } + }) +} diff --git a/pkg/blockdev/fake/blockdev.go b/pkg/blockdev/fake/blockdev.go new file mode 100644 index 000000000..f13f5d37b --- /dev/null +++ b/pkg/blockdev/fake/blockdev.go @@ -0,0 +1,121 @@ +/* +Copyright 2018 Mirantis + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package fake + +import ( + "io/ioutil" + "os" + "path/filepath" + "testing" +) + +const dmsetupTable = `vg1-home_rimage_1: 0 209715200 linear 8:1 10240 +vg1-home_rimage_0: 0 209715200 linear 8:17 29304832 +vg1-home: 0 209715200 raid raid1 3 0 region_size 1024 2 253:1 253:2 253:3 253:4 +virtlet-dm-5edfe2ad-9852-439b-bbfb-3fe8b7c72906: 0 8191999 linear 252:0 1 +virtlet-dm-92ed2bdc-4b47-43ce-b0ba-ff1c06a2652d: 0 8191999 linear 252:1 1 +virtlet-dm-9a322047-1f0d-4395-8e43-6e1b310ce6f3: 0 8191999 linear 252:2 1 +vg1-home_rmeta_1: 0 8192 linear 8:1 2048 +vg1-home_rmeta_0: 0 8192 linear 8:17 29296640 +vg1-swap: 0 29294592 linear 8:1 908404736 +vg1-root: 0 29294592 linear 8:17 2048 +vg1-var: 0 1397358592 striped 2 128 8:1 209725440 8:17 239020032 +` + +var ueventFiles = map[string]string{ + "252:0": `MAJOR=252 +MINOR=0 +DEVNAME=rootdev +DEVTYPE=disk +`, + "252:1": `MAJOR=252 +MINOR=0 +DEVNAME=nonrootdev +DEVTYPE=disk +`, + "252:2": `MAJOR=252 +MINOR=0 +DEVNAME=rootdev1 +DEVTYPE=disk +`, + "8:1": `MAJOR=8 +MINOR=1 +DEVNAME=swapdev +DEVTYPE=disk +`, +} + +// WithFakeRootDevs calls the specified function passing it the paths to +// the fake block devices and their containing directory. +func WithFakeRootDevs(t *testing.T, size uint64, names []string, toCall func(devPaths []string, devDir string)) { + tmpDir, err := ioutil.TempDir("", "fake-blockdev") + if err != nil { + t.Fatalf("TempDir(): %v", err) + } + defer os.RemoveAll(tmpDir) + fakeDevDir := filepath.Join(tmpDir, "__dev__") + if err := os.Mkdir(fakeDevDir, 0777); err != nil { + t.Fatalf("Mkdir(): %v", err) + } + + var devPaths []string + for _, name := range names { + devPath := filepath.Join(fakeDevDir, name) + if err := ioutil.WriteFile(devPath, make([]byte, size), 0666); err != nil { + t.Fatalf("WriteFile(): %v", err) + } + devPaths = append(devPaths, devPath) + + } + toCall(devPaths, fakeDevDir) +} + +// WithFakeRootDev calls the specified function passing it the path to +// the fake block device and its containing directory. +func WithFakeRootDev(t *testing.T, size uint64, toCall func(devPath, devDir string)) { + WithFakeRootDevs(t, size, []string{"rootdev"}, func(devPaths []string, devDir string) { + toCall(devPaths[0], devDir) + }) +} + +// WithFakeRootDevsAndSysfs calls the specified function passing it +// the paths to the fake block devices and their containing directory, +// as well as the path to fake sysfs containing uevent entries for the +// fake devices. +func WithFakeRootDevsAndSysfs(t *testing.T, toCall func(devPaths []string, table, devDir, sysfsDir string)) { + WithFakeRootDevs(t, 2048, []string{"rootdev", "rootdev1"}, func(devPaths []string, devDir string) { + // write dummy headerless file as swapdev and nonrootdev + for _, devName := range []string{"swapdev", "nonrootdev"} { + if err := ioutil.WriteFile(filepath.Join(devDir, devName), make([]byte, 1024), 0666); err != nil { + t.Fatalf("WriteFile(): %v", err) + } + } + + sysfsDir := filepath.Join(devDir, "sys") + for id, content := range ueventFiles { + devInfoDir := filepath.Join(sysfsDir, "dev/block", id) + if err := os.MkdirAll(devInfoDir, 0777); err != nil { + t.Fatalf("MkdirAll(): %v", err) + } + if err := ioutil.WriteFile(filepath.Join(devInfoDir, "uevent"), []byte(content), 0666); err != nil { + t.Fatalf("WriteFile(): %v", err) + } + } + + toCall(devPaths, dmsetupTable, devDir, sysfsDir) + }) +} diff --git a/pkg/libvirttools/TestDomainDefinitions__persistent_rootfs.out.yaml b/pkg/libvirttools/TestDomainDefinitions__persistent_rootfs.out.yaml index d16bfc007..d4c9d1bc9 100755 --- a/pkg/libvirttools/TestDomainDefinitions__persistent_rootfs.out.yaml +++ b/pkg/libvirttools/TestDomainDefinitions__persistent_rootfs.out.yaml @@ -6,12 +6,12 @@ stdout: "1000" - name: CMD value: - cmd: dmsetup create virtlet-dm-69eec606-0493-5825-73a4-c5e0c0236155 + cmd: dmsetup create virtlet-dm-231700d5-c9a6-5a49-738d-99a954c51550 stdin: | 0 999 linear /fakedev/69eec606-0493-5825-73a4-c5e0c0236155/volumeDevices/kubernetes.io~local-volume/root 1 - name: CMD value: - cmd: qemu-img convert -O raw /fake/volume/path /dev/mapper/virtlet-dm-69eec606-0493-5825-73a4-c5e0c0236155 + cmd: qemu-img convert -O raw /fake/volume/path /dev/mapper/virtlet-dm-231700d5-c9a6-5a49-738d-99a954c51550 - name: 'domain conn: DefineDomain' value: |- @@ -38,7 +38,7 @@ /vmwrapper - +
@@ -82,4 +82,4 @@ - name: 'domain conn: virtlet-231700d5-c9a6-container1: Undefine' - name: CMD value: - cmd: dmsetup remove virtlet-dm-69eec606-0493-5825-73a4-c5e0c0236155 + cmd: dmsetup remove virtlet-dm-231700d5-c9a6-5a49-738d-99a954c51550 diff --git a/pkg/libvirttools/TestPersistentRootVolume__symlinks.out.yaml b/pkg/libvirttools/TestPersistentRootVolume__symlinks.out.yaml index 054a43f45..7b70d3c90 100755 --- a/pkg/libvirttools/TestPersistentRootVolume__symlinks.out.yaml +++ b/pkg/libvirttools/TestPersistentRootVolume__symlinks.out.yaml @@ -3,7 +3,7 @@ value: persistent/image1 - name: CMD value: - cmd: blockdev --getsz /dev/rootdev + cmd: blockdev --getsz /dev/rootdevlink stdout: "17" - name: CMD value: @@ -29,7 +29,7 @@ value: persistent/image1 - name: CMD value: - cmd: blockdev --getsz /dev/rootdev + cmd: blockdev --getsz /dev/rootdevlink stdout: "17" - name: CMD value: diff --git a/pkg/libvirttools/gc.go b/pkg/libvirttools/gc.go index df5d36a24..30f4fbfbe 100644 --- a/pkg/libvirttools/gc.go +++ b/pkg/libvirttools/gc.go @@ -21,6 +21,8 @@ import ( "os" "path/filepath" "strings" + + "github.com/Mirantis/virtlet/pkg/blockdev" ) const ( @@ -43,6 +45,7 @@ func (v *VirtualizationTool) GarbageCollect() (allErrors []error) { allErrors = append(allErrors, v.removeOrphanRootVolumes(ids)...) allErrors = append(allErrors, v.removeOrphanQcow2Volumes(ids)...) allErrors = append(allErrors, v.removeOrphanConfigImages(ids, configIsoDir)...) + allErrors = append(allErrors, v.removeOrphanVirtualBlockDevices(ids, "", "")...) return } @@ -263,3 +266,33 @@ func (v *VirtualizationTool) removeOrphanConfigImages(ids []string, directory st return allErrors } + +func (v *VirtualizationTool) removeOrphanVirtualBlockDevices(ids []string, devPath, sysfsPath string) []error { + idsInUse := make(map[string]bool) + for _, id := range ids { + idsInUse[id] = true + } + ldh := blockdev.NewLogicalDeviceHandler(v.Commander(), devPath, sysfsPath) + dmNames, err := ldh.ListVirtletLogicalDevices() + if err != nil { + return []error{err} + } + + var allErrors []error + for _, dmName := range dmNames { + if !strings.HasPrefix(dmName, blockdev.VirtletLogicalDevicePrefix) { + panic("bad dmname " + dmName) + } + id := dmName[len(blockdev.VirtletLogicalDevicePrefix):] + if idsInUse[id] { + continue + } + if err := ldh.Unmap(dmName); err != nil { + allErrors = append( + allErrors, + fmt.Errorf("error unmapping %q: %v", dmName, err)) + } + } + + return allErrors +} diff --git a/pkg/libvirttools/gc_test.go b/pkg/libvirttools/gc_test.go index d2a93fc84..ce96b9a79 100644 --- a/pkg/libvirttools/gc_test.go +++ b/pkg/libvirttools/gc_test.go @@ -17,6 +17,7 @@ limitations under the License. package libvirttools import ( + "crypto/sha256" "io/ioutil" "os" "path/filepath" @@ -24,12 +25,15 @@ import ( libvirtxml "github.com/libvirt/libvirt-go-xml" + blockdev "github.com/Mirantis/virtlet/pkg/blockdev" + fakeblockdev "github.com/Mirantis/virtlet/pkg/blockdev/fake" + fakeutils "github.com/Mirantis/virtlet/pkg/utils/fake" testutils "github.com/Mirantis/virtlet/pkg/utils/testing" "github.com/Mirantis/virtlet/tests/gm" ) var ( - randomUUIDs = [...]string{ + testUUIDs = [...]string{ "5edfe2ad-9852-439b-bbfb-3fe8b7c72906", "8a6163c3-e4ee-488f-836a-d2abe92d0744", "13f51f8d-0f4e-4538-9db0-413380ff9c84", @@ -40,12 +44,12 @@ func TestDomainCleanup(t *testing.T) { ct := newContainerTester(t, testutils.NewToplevelRecorder(), nil) defer ct.teardown() - for _, uuid := range randomUUIDs { + for _, uuid := range testUUIDs { if _, err := ct.domainConn.DefineDomain(&libvirtxml.Domain{ Name: "virtlet-" + uuid[:13] + "-container1", UUID: uuid, }); err != nil { - t.Fatalf("Cannot define new fake domain: %v", err) + t.Fatalf("Cannot define the fake domain: %v", err) } } if _, err := ct.domainConn.DefineDomain(&libvirtxml.Domain{ @@ -60,9 +64,8 @@ func TestDomainCleanup(t *testing.T) { } // this should remove all domains (including other than virlet defined) - // with an exception of the last listed in randomUUIDs slice - errors := ct.virtTool.removeOrphanDomains(randomUUIDs[2:]) - if errors != nil { + // with an exception of the last listed in testUUIDs slice + if errors := ct.virtTool.removeOrphanDomains(testUUIDs[2:]); len(errors) != 0 { t.Errorf("removeOrphanDomains returned errors: %v", errors) } @@ -82,7 +85,7 @@ func TestRootVolumesCleanup(t *testing.T) { t.Fatalf("StoragePool(): %v", err) } - for _, uuid := range randomUUIDs { + for _, uuid := range testUUIDs { if _, err := pool.CreateStorageVol(&libvirtxml.StorageVolume{ Name: "root for " + uuid, Target: &libvirtxml.StorageVolumeTarget{Path: "/some/path/virtlet_root_" + uuid}, @@ -102,9 +105,8 @@ func TestRootVolumesCleanup(t *testing.T) { } // this should remove only root volumes corresponding to the two first - // elements of randomUUIDs slice, keeping others - errors := ct.virtTool.removeOrphanRootVolumes(randomUUIDs[2:]) - if errors != nil { + // elements of testUUIDs slice, keeping others + if errors := ct.virtTool.removeOrphanRootVolumes(testUUIDs[2:]); len(errors) != 0 { t.Errorf("removeOrphanRootVolumes returned errors: %v", errors) } @@ -124,7 +126,7 @@ func TestQcow2VolumesCleanup(t *testing.T) { t.Fatalf("StoragePool(): %v", err) } - for _, uuid := range randomUUIDs { + for _, uuid := range testUUIDs { if _, err := pool.CreateStorageVol(&libvirtxml.StorageVolume{ Name: "qcow flexvolume for " + uuid, Target: &libvirtxml.StorageVolumeTarget{Path: "/some/path/virtlet-" + uuid}, @@ -144,9 +146,8 @@ func TestQcow2VolumesCleanup(t *testing.T) { } // this should remove only ephemeral qcow2 volumes corresponding to - // the two first elements of randomUUIDs slice, keeping others - errors := ct.virtTool.removeOrphanQcow2Volumes(randomUUIDs[2:]) - if errors != nil { + // the two first elements of testUUIDs slice, keeping others + if errors := ct.virtTool.removeOrphanQcow2Volumes(testUUIDs[2:]); len(errors) != 0 { t.Errorf("removeOrphanRootVolumes returned errors: %v", errors) } @@ -167,7 +168,7 @@ func TestConfigISOsCleanup(t *testing.T) { } defer os.RemoveAll(directory) - for _, uuid := range randomUUIDs { + for _, uuid := range testUUIDs { fname := filepath.Join(directory, "config-"+uuid+".iso") if file, err := os.Create(fname); err != nil { t.Fatalf("Cannot create fake iso with name %q: %v", fname, err) @@ -191,9 +192,8 @@ func TestConfigISOsCleanup(t *testing.T) { } // this should remove only config iso file corresponding to the first - // element of randomUUIDs slice, keeping other files - errors := ct.virtTool.removeOrphanConfigImages(randomUUIDs[1:], directory) - if errors != nil { + // element of testUUIDs slice, keeping other files + if errors := ct.virtTool.removeOrphanConfigImages(testUUIDs[1:], directory); len(errors) != 0 { t.Errorf("removeOrphanConfigImages returned errors: %v", errors) } @@ -207,7 +207,7 @@ func TestConfigISOsCleanup(t *testing.T) { t.Fatalf("Expected removeOrphanConfigImages to remove single file, but it removed %d files", len(diff)) } - expectedPath := filepath.Join(directory, "config-"+randomUUIDs[0]+".iso") + expectedPath := filepath.Join(directory, "config-"+testUUIDs[0]+".iso") if diff[0] != expectedPath { t.Fatalf("Expected removeOrphanConfigImages to remove only %q file, but it also removed: %q", expectedPath, diff[0]) } @@ -215,6 +215,53 @@ func TestConfigISOsCleanup(t *testing.T) { // no gm validation, because we are testing only file operations in this test } +func TestDeviceMapperCleanup(t *testing.T) { + fakeblockdev.WithFakeRootDevsAndSysfs(t, func(devPaths []string, table, devDir, sysfsDir string) { + dmRemoveCmd := "dmsetup remove virtlet-dm-9a322047-1f0d-4395-8e43-6e1b310ce6f3" + ct := newContainerTester(t, testutils.NewToplevelRecorder(), []fakeutils.CmdSpec{ + { + Match: "^dmsetup table$", + Stdout: table, + }, + { + Match: "^" + dmRemoveCmd + "$", + }, + }) + defer ct.teardown() + + ldh := blockdev.NewLogicalDeviceHandler(ct.virtTool.commander, devDir, sysfsDir) + for _, devPath := range devPaths { + if _, err := ldh.EnsureDevHeaderMatches(devPath, sha256.Sum256([]byte("foobar"))); err != nil { + t.Fatalf("EnsureDevHeaderMatches(): %v", err) + } + } + + for _, uuid := range testUUIDs { + if _, err := ct.domainConn.DefineDomain(&libvirtxml.Domain{ + Name: "virtlet-" + uuid[:13] + "-container1", + UUID: uuid, + }); err != nil { + t.Fatalf("Cannot define the fake domain: %v", err) + } + } + + if errors := ct.virtTool.removeOrphanVirtualBlockDevices(testUUIDs[:], devDir, sysfsDir); len(errors) != 0 { + t.Errorf("removeOrphanDomains returned errors: %v", errors) + } + + n := 0 + for _, r := range ct.rec.Content() { + if r.Name == "CMD" && r.Value.(map[string]string)["cmd"] == dmRemoveCmd { + n++ + } + } + if n != 1 { + t.Errorf("dmsetup remove for the orphaned volume is expected to be called exactly 1 time, but was called %d times", n) + } + }) + // no gm validation b/c we just verify 'dmsetup remove' command above +} + // https://stackoverflow.com/a/45428032 // difference returns the elements in a that aren't in b func difference(a, b []string) []string { diff --git a/pkg/libvirttools/persistentroot_volumesource.go b/pkg/libvirttools/persistentroot_volumesource.go index 214139a2b..f91c64497 100644 --- a/pkg/libvirttools/persistentroot_volumesource.go +++ b/pkg/libvirttools/persistentroot_volumesource.go @@ -18,32 +18,16 @@ package libvirttools import ( "crypto/sha256" - "encoding/binary" "encoding/hex" "fmt" - "os" - "path/filepath" - "strconv" - "strings" libvirtxml "github.com/libvirt/libvirt-go-xml" digest "github.com/opencontainers/go-digest" + "github.com/Mirantis/virtlet/pkg/blockdev" "github.com/Mirantis/virtlet/pkg/metadata/types" ) -const ( - virtletRootfsMagic = 0x263dbe52ba576702 - virtletRootfsMetadataVersion = 1 - sectorSize = 512 -) - -type virtletRootfsHeader struct { - Magic uint64 - MetadataVersion uint16 - ImageHash [sha256.Size]byte -} - // persistentRootVolume represents a root volume that can survive the // deletion of its pod type persistentRootVolume struct { @@ -53,116 +37,22 @@ type persistentRootVolume struct { var _ VMVolume = &persistentRootVolume{} +func (v *persistentRootVolume) devHandler() *blockdev.LogicalDeviceHandler { + return blockdev.NewLogicalDeviceHandler(v.owner.Commander(), "", "") +} + func (v *persistentRootVolume) UUID() string { return v.dev.UUID() } func (v *persistentRootVolume) dmName() string { - return "virtlet-dm-" + v.config.PodSandboxID + return "virtlet-dm-" + v.config.DomainUUID } func (v *persistentRootVolume) dmPath() string { return "/dev/mapper/" + v.dmName() } -func (v *persistentRootVolume) ensureDevHeaderMatches(imageHash [sha256.Size]byte) (bool, error) { - f, err := os.OpenFile(v.dev.HostPath, os.O_RDWR|os.O_SYNC, 0) - if err != nil { - return false, fmt.Errorf("open %q: %v", v.dev.HostPath, err) - } - defer func() { - if f != nil { - f.Close() - } - }() - - var hdr virtletRootfsHeader - if err := binary.Read(f, binary.BigEndian, &hdr); err != nil { - return false, fmt.Errorf("reading rootfs header: %v", err) - } - - headerMatch := true - switch { - case hdr.Magic != virtletRootfsMagic || hdr.ImageHash != imageHash: - headerMatch = false - if _, err := f.Seek(0, os.SEEK_SET); err != nil { - return false, fmt.Errorf("seek: %v", err) - } - if err := binary.Write(f, binary.BigEndian, virtletRootfsHeader{ - Magic: virtletRootfsMagic, - MetadataVersion: virtletRootfsMetadataVersion, - ImageHash: imageHash, - }); err != nil { - return false, fmt.Errorf("writing rootfs header: %v", err) - } - case hdr.MetadataVersion != virtletRootfsMetadataVersion: - // NOTE: we should handle earlier metadata versions - // after we introduce new ones. But we can't handle - // future metadata versions and any non-matching - // metadata versions are future ones currently, so we - // don't want to lose any data here. - return false, fmt.Errorf("unsupported virtlet root device metadata version %v", hdr.MetadataVersion) - } - - if err := f.Close(); err != nil { - return false, fmt.Errorf("error closing rootfs device: %v", err) - } - f = nil - return headerMatch, nil -} - -func (v *persistentRootVolume) blockDevSizeInSectors() (uint64, error) { - // NOTE: this is also doable via ioctl but this way it's - // shorter (no need for fake non-linux impl, extra interface, - // extra fake impl for it). Some links that may help if we - // decide to use the ioctl later on: - // https://github.com/karelzak/util-linux/blob/master/disk-utils/blockdev.c - // https://github.com/aicodix/smr/blob/24aa589f378827a69a07d220f114c169693dacec/smr.go#L29 - out, err := v.owner.Commander().Command("blockdev", "--getsz", v.dev.HostPath).Run(nil) - if err != nil { - return 0, err - } - nSectors, err := strconv.ParseUint(strings.TrimSpace(string(out)), 10, 64) - if err != nil { - return 0, fmt.Errorf("bad size value returned by blockdev: %q: %v", out, err) - } - return nSectors, nil -} - -func (v *persistentRootVolume) dmCmd(cmd []string, stdin string) error { - dmCmd := v.owner.Commander().Command(cmd[0], cmd[1:]...) - var stdinBytes []byte - if stdin != "" { - stdinBytes = []byte(stdin) - } - _, err := dmCmd.Run(stdinBytes) - return err -} - -func (v *persistentRootVolume) dmSetup(imageSize uint64) error { - nSectors, err := v.blockDevSizeInSectors() - if err != nil { - return err - } - // sector 0 is reserved for the Virtlet metadata - minSectors := (imageSize+sectorSize-1)/sectorSize + 1 - if nSectors < minSectors { - return fmt.Errorf("block device too small for the image: need at least %d bytes (%d sectors) but got %d bytes (%d sectors)", - minSectors*sectorSize, - minSectors, - nSectors*sectorSize, - nSectors) - } - hostPath, err := filepath.EvalSymlinks(v.dev.HostPath) - if err != nil { - return err - } - dmTable := fmt.Sprintf("0 %d linear %s 1\n", nSectors-1, hostPath) - dmCmd := v.owner.Commander().Command("dmsetup", "create", v.dmName()) - _, err = dmCmd.Run([]byte(dmTable)) - return err -} - func (v *persistentRootVolume) copyImageToDev(imagePath string) error { _, err := v.owner.Commander().Command("qemu-img", "convert", "-O", "raw", imagePath, v.dmPath()).Run(nil) return err @@ -187,10 +77,11 @@ func (v *persistentRootVolume) Setup() (*libvirtxml.DomainDisk, *libvirtxml.Doma var hash [sha256.Size]byte copy(hash[:], imageHash) - headerMatches, err := v.ensureDevHeaderMatches(hash) + ldh := v.devHandler() + headerMatches, err := ldh.EnsureDevHeaderMatches(v.dev.HostPath, hash) if err == nil { - err = v.dmSetup(imageSize) + err = ldh.Map(v.dev.HostPath, v.dmName(), imageSize) } if err == nil && !headerMatches { @@ -203,12 +94,11 @@ func (v *persistentRootVolume) Setup() (*libvirtxml.DomainDisk, *libvirtxml.Doma return &libvirtxml.DomainDisk{ Device: "disk", - Source: &libvirtxml.DomainDiskSource{Block: &libvirtxml.DomainDiskSourceBlock{Dev: v.dmPath()}}, //hostPath}}, + Source: &libvirtxml.DomainDiskSource{Block: &libvirtxml.DomainDiskSourceBlock{Dev: v.dmPath()}}, Driver: &libvirtxml.DomainDiskDriver{Name: "qemu", Type: "raw"}, }, nil, nil } func (v *persistentRootVolume) Teardown() error { - _, err := v.owner.Commander().Command("dmsetup", "remove", v.dmName()).Run(nil) - return err + return v.devHandler().Unmap(v.dmName()) } diff --git a/pkg/libvirttools/persistentroot_volumesource_test.go b/pkg/libvirttools/persistentroot_volumesource_test.go index df48a61ff..5a408fc65 100644 --- a/pkg/libvirttools/persistentroot_volumesource_test.go +++ b/pkg/libvirttools/persistentroot_volumesource_test.go @@ -17,7 +17,6 @@ limitations under the License. package libvirttools import ( - "io/ioutil" "os" "path/filepath" "strconv" @@ -27,6 +26,7 @@ import ( "github.com/Mirantis/virtlet/tests/gm" digest "github.com/opencontainers/go-digest" + fakeblockdev "github.com/Mirantis/virtlet/pkg/blockdev/fake" "github.com/Mirantis/virtlet/pkg/metadata/types" fakeutils "github.com/Mirantis/virtlet/pkg/utils/fake" testutils "github.com/Mirantis/virtlet/pkg/utils/testing" @@ -98,75 +98,55 @@ func TestPersistentRootVolume(t *testing.T) { }, } { t.Run(tc.name, func(t *testing.T) { - rec := testutils.NewToplevelRecorder() - im := newFakeImageManager(rec.Child("image"), fakeImages...) - if tc.fileSize%512 != 0 { - t.Fatalf("block device size must be a multiple of 512") - } - - tmpDir, err := ioutil.TempDir("", "fake-persistent-rootfs") - if err != nil { - t.Fatalf("TempDir(): %v", err) - } - defer os.RemoveAll(tmpDir) - fakeDevDir := filepath.Join(tmpDir, "__dev__") - if err := os.Mkdir(fakeDevDir, 0777); err != nil { - t.Fatalf("Mkdir(): %v", err) - } - - devPath := filepath.Join(fakeDevDir, "rootdev") - devFile, err := os.Create(devPath) - if err != nil { - t.Fatalf("Create(): %v", err) - } - if _, err := devFile.Write(make([]byte, tc.fileSize)); err != nil { - devFile.Close() - t.Fatalf("Write(): %v", err) - } - if err := devFile.Close(); err != nil { - t.Fatalf("devFile.Close()") - } - devPathToUse := devPath - if tc.useSymlink { - devPathToUse := filepath.Join(fakeDevDir, "rootdevlink") - if err := os.Symlink(devPath, devPathToUse); err != nil { - t.Fatalf("Symlink(): %v", err) + fakeblockdev.WithFakeRootDev(t, tc.fileSize, func(devPath, devDir string) { + rec := testutils.NewToplevelRecorder() + im := newFakeImageManager(rec.Child("image"), fakeImages...) + if tc.fileSize%512 != 0 { + t.Fatalf("block device size must be a multiple of 512") } - } - for n, imageName := range []string{tc.imageName, tc.secondImageName} { - if imageName == "" { - continue + devPathToUse := devPath + if tc.useSymlink { + devPathToUse = filepath.Join(devDir, "rootdevlink") + if err := os.Symlink(devPath, devPathToUse); err != nil { + t.Fatalf("Symlink(): %v", err) + } } - cmdSpecs := []fakeutils.CmdSpec{ - { - Match: "blockdev --getsz", - Stdout: strconv.Itoa(int(tc.fileSize / 512)), - }, - { - Match: "dmsetup create", - }, - { - Match: "dmsetup remove", - }, - } - if n == 0 || tc.imageWrittenAgain { - // qemu-img convert is used to write the image to the block device. - // It should only be called if the image changes. - cmdSpecs = append(cmdSpecs, fakeutils.CmdSpec{ - Match: "qemu-img convert", - }) - } - cmd := fakeutils.NewCommander(rec, cmdSpecs) - cmd.ReplaceTempPath("__dev__", "/dev") - owner := newFakeVolumeOwner(nil, im, cmd) - rootVol := getPersistentRootVolume(t, imageName, devPathToUse, owner) - verifyRootVolumeSetup(t, rec, rootVol, tc.errors[n]) - if tc.errors[n] == "" { - verifyRootVolumeTeardown(t, rec, rootVol) + + for n, imageName := range []string{tc.imageName, tc.secondImageName} { + if imageName == "" { + continue + } + cmdSpecs := []fakeutils.CmdSpec{ + { + Match: "blockdev --getsz", + Stdout: strconv.Itoa(int(tc.fileSize / 512)), + }, + { + Match: "dmsetup create", + }, + { + Match: "dmsetup remove", + }, + } + if n == 0 || tc.imageWrittenAgain { + // qemu-img convert is used to write the image to the block device. + // It should only be called if the image changes. + cmdSpecs = append(cmdSpecs, fakeutils.CmdSpec{ + Match: "qemu-img convert", + }) + } + cmd := fakeutils.NewCommander(rec, cmdSpecs) + cmd.ReplaceTempPath("__dev__", "/dev") + owner := newFakeVolumeOwner(nil, im, cmd) + rootVol := getPersistentRootVolume(t, imageName, devPathToUse, owner) + verifyRootVolumeSetup(t, rec, rootVol, tc.errors[n]) + if tc.errors[n] == "" { + verifyRootVolumeTeardown(t, rec, rootVol) + } } - } - gm.Verify(t, gm.NewYamlVerifier(rec.Content())) + gm.Verify(t, gm.NewYamlVerifier(rec.Content())) + }) }) } } @@ -223,8 +203,8 @@ func verifyRootVolumeTeardown(t *testing.T, rec testutils.Recorder, rootVol *per func getPersistentRootVolume(t *testing.T, imageName, devHostPath string, owner volumeOwner) *persistentRootVolume { volumes, err := GetRootVolume( &types.VMConfig{ - PodSandboxID: testUUID, - Image: imageName, + DomainUUID: testUUID, + Image: imageName, VolumeDevices: []types.VMVolumeDevice{ { DevicePath: "/", diff --git a/pkg/utils/fake/command.go b/pkg/utils/fake/command.go index 12468eecf..d5f145da1 100644 --- a/pkg/utils/fake/command.go +++ b/pkg/utils/fake/command.go @@ -53,20 +53,20 @@ func (c *fakeCommand) Run(stdin []byte) ([]byte, error) { r := map[string]string{ "cmd": fullCmd, } - defer c.rec.Rec("CMD", r) + if c.rec != nil { + defer c.rec.Rec("CMD", r) + } for _, spec := range c.commander.specs { matched, err := regexp.MatchString(spec.Match, fullCmd) if err != nil { return nil, fmt.Errorf("failed to match regexp %q: %v", spec.Match, err) } if matched { - if c.rec != nil { - if stdin != nil { - r["stdin"] = c.subst(string(stdin)) - } - if spec.Stdout != "" { - r["stdout"] = spec.Stdout - } + if stdin != nil { + r["stdin"] = c.subst(string(stdin)) + } + if spec.Stdout != "" { + r["stdout"] = spec.Stdout } return []byte(spec.Stdout), nil }