Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Image.Extent() interface #38

Merged
merged 9 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion cmd/go-qcow2reader-example/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"os"

"github.com/cheggaaa/pb/v3"
"github.com/lima-vm/go-qcow2reader"
"github.com/lima-vm/go-qcow2reader/convert"
"github.com/lima-vm/go-qcow2reader/log"
Expand Down Expand Up @@ -76,7 +77,12 @@ func cmdConvert(args []string) error {
if err != nil {
return err
}
if err := c.Convert(t, img, img.Size()); err != nil {

bar := newProgressBar(img.Size())
bar.Start()
defer bar.Finish()

if err := c.Convert(t, img, img.Size(), bar); err != nil {
return err
}

Expand All @@ -86,3 +92,18 @@ func cmdConvert(args []string) error {

return t.Close()
}

// progressBar adapts pb.ProgressBar to the Updater interface.
type progressBar struct {
*pb.ProgressBar
}

func newProgressBar(size int64) *progressBar {
b := &progressBar{pb.New64(size)}
b.Set(pb.Bytes, true)
return b
}

func (b *progressBar) Update(n int64) {
b.ProgressBar.Add64(n)
}
11 changes: 11 additions & 0 deletions cmd/go-qcow2reader-example/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,19 @@ module github.com/lima-vm/go-qcow2reader/cmd/go-qcow2reader-example
go 1.22

require (
github.com/cheggaaa/pb/v3 v3.1.5
github.com/klauspost/compress v1.16.5
github.com/lima-vm/go-qcow2reader v0.0.0-00010101000000-000000000000
)

require (
github.com/VividCortex/ewma v1.2.0 // indirect
github.com/fatih/color v1.15.0 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.19 // indirect
github.com/mattn/go-runewidth v0.0.15 // indirect
github.com/rivo/uniseg v0.2.0 // indirect
golang.org/x/sys v0.6.0 // indirect
)

replace github.com/lima-vm/go-qcow2reader => ../../
18 changes: 18 additions & 0 deletions cmd/go-qcow2reader-example/go.sum
Original file line number Diff line number Diff line change
@@ -1,2 +1,20 @@
github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow=
github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4=
github.com/cheggaaa/pb/v3 v3.1.5 h1:QuuUzeM2WsAqG2gMqtzaWithDJv0i+i6UlnwSCI4QLk=
github.com/cheggaaa/pb/v3 v3.1.5/go.mod h1:CrxkeghYTXi1lQBEI7jSn+3svI3cuc19haAj6jM60XI=
github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs=
github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw=
github.com/klauspost/compress v1.16.5 h1:IFV2oUNUzZaz+XyusxpLzpzS8Pt5rh0Z16For/djlyI=
github.com/klauspost/compress v1.16.5/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U=
github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
5 changes: 4 additions & 1 deletion cmd/go-qcow2reader-example/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ func usage() {
Available commands:
info show image information
read read image data and print to stdout
convert convert image to raw format
convert convert image to raw format
map print image extents
`
fmt.Fprintf(os.Stderr, usage, os.Args[0])
os.Exit(1)
Expand Down Expand Up @@ -71,6 +72,8 @@ func main() {
err = cmdRead(args)
case "convert":
err = cmdConvert(args)
case "map":
err = cmdMap(args)
default:
usage()
}
Expand Down
73 changes: 73 additions & 0 deletions cmd/go-qcow2reader-example/map.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package main

import (
"bufio"
"encoding/json"
"errors"
"flag"
"fmt"
"os"

"github.com/lima-vm/go-qcow2reader"
"github.com/lima-vm/go-qcow2reader/log"
)

func cmdMap(args []string) error {
var (
// Required
filename string

// Options
debug bool
)

fs := flag.NewFlagSet("map", flag.ExitOnError)
fs.Usage = func() {
fmt.Fprintf(fs.Output(), "Usage: %s map [OPTIONS...] FILE\n", os.Args[0])
flag.PrintDefaults()
}
fs.BoolVar(&debug, "debug", false, "enable printing debug messages")
if err := fs.Parse(args); err != nil {
return err
}

if debug {
log.SetDebugFunc(logDebug)
}

switch len(fs.Args()) {
case 0:
return errors.New("no file was specified")
case 1:
filename = fs.Arg(0)
default:
return errors.New("too many files were specified")
}

f, err := os.Open(filename)
if err != nil {
return err
}
defer f.Close()

img, err := qcow2reader.Open(f)
if err != nil {
return err
}
defer img.Close()

writer := bufio.NewWriter(os.Stdout)
encoder := json.NewEncoder(writer)

var start int64
end := img.Size()
for start < end {
extent, err := img.Extent(start, end-start)
if err != nil {
return err
}
encoder.Encode(extent)
start += extent.Length
}
return writer.Flush()
}
112 changes: 79 additions & 33 deletions convert/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,29 @@ import (
"fmt"
"io"
"sync"

"github.com/lima-vm/go-qcow2reader/image"
)

// The size of the buffer used to read data from non-zero extents of the image.
// For best performance, the size should be aligned to the image cluster size or
// the file system block size.
const BufferSize = 1024 * 1024

// Smaller value may increase the overhead of synchornizing multiple works.
// To maxmimze performance we use multiple goroutines to read data from the
// source image, decompress data, and write data to the target image. To
// schedule the work to multiple goroutines, the image is split to multiple
// segments, each processed by a single worker goroutine.
//
// Smaller value may increase the overhead of synchornizing multiple workers.
// Larger value may be less efficient for smaller images. The default value
// gives good results for the lima default Ubuntu image.
// gives good results for the lima default Ubuntu image. Must be aligned to
// BufferSize.
const SegmentSize = 32 * BufferSize

// For best I/O throughput we want to have enough in-flight requests, regardless
// of number of cores. For best decompression we want to use one worker per
// core, but too many workers is less effective. The default value gives good
// core, but too many workers are less effective. The default value gives good
// results with lima default Ubuntu image.
const Workers = 8

Expand Down Expand Up @@ -67,6 +78,14 @@ func (o *Options) Validate() error {
return nil
}

// Updater is an interface for tracking conversion progress.
type Updater interface {
// Called from multiple goroutines after a byte range of length was converted.
// If the conversion is successfu, the total number of bytes will be the image
// virtual size.
Update(n int64)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can n be negative?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, but this is the type the go and the kernel use for file size (off_t). It is easier to use when we can use Extent.Length when updating.

We an add a check for n < 0 in the progress bar example implementation.

}

type Converter struct {
// Read only after starting.
size int64
Expand Down Expand Up @@ -129,10 +148,13 @@ func (c *Converter) reset(size int64) {
c.offset = 0
}

// Convert copy size bytes from io.ReaderAt to io.WriterAt. Unallocated areas or
// areas full of zeros in the source are keep unallocated in the destination.
// The destination must be new empty or full of zeroes.
func (c *Converter) Convert(wa io.WriterAt, ra io.ReaderAt, size int64) error {
// Convert copy size bytes from image to io.WriterAt. Unallocated extents in the
// source image or read data which is all zeros are converted to unallocated
// byte range in the target image. The target image must be new empty file or a
// file full of zeroes. To get a sparse target image, the image must be a new
// empty file, since Convert does not punch holes for zero ranges even if the
// underlying file system supports hole punching.
func (c *Converter) Convert(wa io.WriterAt, img image.Image, size int64, progress Updater) error {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

progress Updater could be provided in Options?
Then we would not have to change the function signature?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is possible, but even if we do we accept now an image instead of a ReaderAt and size.

Also progress scope is one convert, while convert instance can convert many images using the same options, so it is better to accept progress per convert operation.

The options are not something you change per image, it may change depending on the machine (e.g number of cores).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another way to accept options to convert(), and use the zero value of the converter:

var c convert.Converter
c.Convert(target, src, convert.Options{Progress: bar})

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There other options that we may want to add in the future that may be per image options, so moving the options to Convert makes sense anyway.

c.reset(size)

zero := make([]byte, c.bufferSize)
Expand All @@ -151,40 +173,64 @@ func (c *Converter) Convert(wa io.WriterAt, ra io.ReaderAt, size int64) error {
}

for start < end {
// The last read may be shorter.
n := len(buf)
if end-start < int64(len(buf)) {
n = int(end - start)
// Get next extent in this segment.
nirs marked this conversation as resolved.
Show resolved Hide resolved
extent, err := img.Extent(start, end-start)
if err != nil {
c.setError(err)
return
}
if extent.Zero {
start += extent.Length
nirs marked this conversation as resolved.
Show resolved Hide resolved
if progress != nil {
progress.Update(extent.Length)
}
continue
}

// Read more data.
nr, err := ra.ReadAt(buf[:n], start)
if err != nil {
if !errors.Is(err, io.EOF) {
c.setError(err)
return
// Consume data from this extent.
for extent.Length > 0 {
// The last read may be shorter.
n := len(buf)
if extent.Length < int64(len(buf)) {
n = int(extent.Length)
}

// EOF for the last read of the last segment is expected, but since we
// read exactly size bytes, we should never get a zero read.
if nr == 0 {
c.setError(errors.New("unexpected EOF"))
return
// Read more data.
nr, err := img.ReadAt(buf[:n], start)
if err != nil {
if !errors.Is(err, io.EOF) {
c.setError(err)
return
}

// EOF for the last read of the last segment is expected, but since we
// read exactly size bytes, we should never get a zero read.
if nr == 0 {
c.setError(errors.New("unexpected EOF"))
return
}
}
}

// If the data is all zeros we skip it to create a hole. Otherwise
// write the data.
if !bytes.Equal(buf[:nr], zero[:nr]) {
if nw, err := wa.WriteAt(buf[:nr], start); err != nil {
c.setError(err)
return
} else if nw != nr {
c.setError(fmt.Errorf("read %d, but wrote %d bytes", nr, nw))
return
// If the data is all zeros we skip it to create a hole. Otherwise
// write the data.
if !bytes.Equal(buf[:nr], zero[:nr]) {
if nw, err := wa.WriteAt(buf[:nr], start); err != nil {
c.setError(err)
return
} else if nw != nr {
c.setError(fmt.Errorf("read %d, but wrote %d bytes", nr, nw))
return
}
}

if progress != nil {
progress.Update(int64(nr))
}

extent.Length -= int64(nr)
extent.Start += int64(nr)
start += int64(nr)
nirs marked this conversation as resolved.
Show resolved Hide resolved
}
start += int64(nr)
}
}
}()
Expand Down
18 changes: 18 additions & 0 deletions image/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,28 @@ import (
// Type must be a "Backing file format name string" that appears in QCOW2.
type Type string

// Extent describes a byte range in the image with the same allocation,
// compression, or zero status. Extents are aligned to the underlying file
// system block size (e.g. 4k), or the image format cluster size (e.g. 64k). One
// extent can describe one or more file system blocks or image clusters.
type Extent struct {
nirs marked this conversation as resolved.
Show resolved Hide resolved
// Offset from start of the image in bytes.
Start int64 `json:"start"`
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not strictly needed, since when calling Next(start, length) we always return the requested start value.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removing Start makes testing easier and eliminate the error of invalid extents list. When consuming the extents we keep current offset externally so we don't really need it. When printing extents having start is helpful. If we don't report it we have to add another type with Start for encoding json.

// Length of this extent in bytes.
Length int64 `json:"length"`
// Set if this extent is allocated.
Allocated bool `json:"allocated"`
// Set if this extent is read as zeros.
Zero bool `json:"zero"`
// Set if this extent is compressed.
Compressed bool `json:"compressed"`
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Allocated and Compressed are not needed for sparse copy, only Zero is needed. They can be useful for other purposes, for example dumping all extents like qemu-img map --output json.

}

// Image implements [io.ReaderAt] and [io.Closer].
type Image interface {
io.ReaderAt
io.Closer
Extent(start, length int64) (Extent, error)
Type() Type
Size() int64 // -1 if unknown
Readable() error
Expand Down
Loading
Loading