Use Image.Next() in converter

Using Image.Next() to skip zero extents instead of reading and detecting zeros. We still detect zeroes in non-zero extents to convert areas full of actual zeros to unallocated area in the target image. This does not change much the performance for the Ubuntu image since most of the time is spent on reading and decompressing the actual data. Converting a large empty image is 2 orders of magnitude faster, so I'm testing now 1 TiB image instead of 100 GiB image. Example run with 1 TiB empty image: % hyperfine -w3 "qemu-img convert -f qcow2 -O raw -W /tmp/images/test.0p.qcow2 /tmp/tmp.img" \ "./go-qcow2reader-example convert /tmp/images/test.0p.qcow2 /tmp/tmp.img" Benchmark 1: qemu-img convert -f qcow2 -O raw -W /tmp/images/test.0p.qcow2 /tmp/tmp.img Time (mean ± σ): 14.0 ms ± 0.4 ms [User: 11.8 ms, System: 2.0 ms] Range (min … max): 13.5 ms … 17.8 ms 181 runs Benchmark 2: ./go-qcow2reader-example convert /tmp/images/test.0p.qcow2 /tmp/tmp.img Time (mean ± σ): 20.6 ms ± 0.2 ms [User: 118.9 ms, System: 2.2 ms] Range (min … max): 20.4 ms … 21.7 ms 130 runs Summary qemu-img convert -f qcow2 -O raw -W /tmp/images/test.0p.qcow2 /tmp/tmp.img ran 1.48 ± 0.04 times faster than ./go-qcow2reader-example convert /tmp/images/test.0p.qcow2 /tmp/tmp.img qemu-img is faster but 7 millisecond difference for 1 TiB image is not very interesting. Signed-off-by: Nir Soffer <[email protected]>
lima-vm · Nov 3, 2024 · 15fae59 · 15fae59
1 parent 245d313
commit 15fae59
Showing 1 changed file with 49 additions and 30 deletions.
diff --git a/convert/convert.go b/convert/convert.go
@@ -6,6 +6,8 @@ import (
 	"fmt"
 	"io"
 	"sync"
+
+	"github.com/lima-vm/go-qcow2reader/image"
 )
 
 const BufferSize = 1024 * 1024
@@ -129,10 +131,10 @@ func (c *Converter) reset(size int64) {
 	c.offset = 0
 }
 
-// Convert copy size bytes from io.ReaderAt to io.WriterAt. Unallocated areas or
-// areas full of zeros in the source are keep unallocated in the destination.
-// The destination must be new empty or full of zeroes.
-func (c *Converter) Convert(wa io.WriterAt, ra io.ReaderAt, size int64) error {
+// Convert copy size bytes from image to io.WriterAt. Unallocated areas or areas
+// full of zeros in the source are keep unallocated in the destination. The
+// destination must be new empty or full of zeroes.
+func (c *Converter) Convert(wa io.WriterAt, img image.Image, size int64) error {
 	c.reset(size)
 
 	zero := make([]byte, c.bufferSize)
@@ -151,40 +153,57 @@ func (c *Converter) Convert(wa io.WriterAt, ra io.ReaderAt, size int64) error {
 				}
 
 				for start < end {
-					// The last read may be shorter.
-					n := len(buf)
-					if end-start < int64(len(buf)) {
-						n = int(end - start)
+					// Get next extent in this segment.
+					extent, err := img.Next(start, end-start)
+					if err != nil {
+						c.setError(err)
+						return
+					}
+					if extent.Zero {
+						start += extent.Length
+						continue
 					}
 
-					// Read more data.
-					nr, err := ra.ReadAt(buf[:n], start)
-					if err != nil {
-						if !errors.Is(err, io.EOF) {
-							c.setError(err)
-							return
+					// Consume data from this extent.
+					for extent.Length > 0 {
+						// The last read may be shorter.
+						n := len(buf)
+						if extent.Length < int64(len(buf)) {
+							n = int(extent.Length)
 						}
 
-						// EOF for the last read of the last segment is expected, but since we
-						// read exactly size bytes, we should never get a zero read.
-						if nr == 0 {
-							c.setError(errors.New("unexpected EOF"))
-							return
+						// Read more data.
+						nr, err := img.ReadAt(buf[:n], start)
+						if err != nil {
+							if !errors.Is(err, io.EOF) {
+								c.setError(err)
+								return
+							}
+
+							// EOF for the last read of the last segment is expected, but since we
+							// read exactly size bytes, we should never get a zero read.
+							if nr == 0 {
+								c.setError(errors.New("unexpected EOF"))
+								return
+							}
 						}
-					}
 
-					// If the data is all zeros we skip it to create a hole. Otherwise
-					// write the data.
-					if !bytes.Equal(buf[:nr], zero[:nr]) {
-						if nw, err := wa.WriteAt(buf[:nr], start); err != nil {
-							c.setError(err)
-							return
-						} else if nw != nr {
-							c.setError(fmt.Errorf("read %d, but wrote %d bytes", nr, nw))
-							return
+						// If the data is all zeros we skip it to create a hole. Otherwise
+						// write the data.
+						if !bytes.Equal(buf[:nr], zero[:nr]) {
+							if nw, err := wa.WriteAt(buf[:nr], start); err != nil {
+								c.setError(err)
+								return
+							} else if nw != nr {
+								c.setError(fmt.Errorf("read %d, but wrote %d bytes", nr, nw))
+								return
+							}
 						}
+
+						extent.Length -= int64(nr)
+						extent.Start += int64(nr)
+						start += int64(nr)
 					}
-					start += int64(nr)
 				}
 			}
 		}()