Skip to content

Commit

Permalink
mantle/qemu: Support catching initramfs failures
Browse files Browse the repository at this point in the history
Pairs with coreos/ignition-dracut#146

This way, we error out fast if something went wrong in the initramfs
rather than timing out.  And further, we get the journal as JSON,
so we can do something intelligent in the future to analyze it.
  • Loading branch information
cgwalters committed Mar 27, 2020
1 parent 9552e83 commit 638ad99
Show file tree
Hide file tree
Showing 13 changed files with 138 additions and 7 deletions.
18 changes: 13 additions & 5 deletions mantle/cmd/kola/qemuexec.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ var (

ignitionFragments []string

forceConfigInjection bool
forceConfigInjection bool
propagateInitramfsFailure bool
)

func init() {
Expand All @@ -63,6 +64,8 @@ func init() {
cmdQemuExec.Flags().IntVarP(&memory, "memory", "m", 0, "Memory in MB")
cmdQemuExec.Flags().StringVarP(&ignition, "ignition", "i", "", "Path to ignition config")
cmdQemuExec.Flags().BoolVarP(&forceConfigInjection, "inject-ignition", "", false, "Force injecting Ignition config using guestfs")
cmdQemuExec.Flags().BoolVar(&propagateInitramfsFailure, "propagate-initramfs-failure", false, "Error out if the system fails in the initramfs")

}

func renderFragments(config v3types.Config) (*v3types.Config, error) {
Expand Down Expand Up @@ -141,8 +144,13 @@ func runQemuExec(cmd *cobra.Command, args []string) error {
return err
}

// Ignore errors
_ = inst.Wait()

return nil
if propagateInitramfsFailure {
err = inst.WaitAll()
if err != nil {
return err
}
return nil
} else {
return inst.Wait()
}
}
2 changes: 1 addition & 1 deletion mantle/cmd/kola/testiso.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ func testPXE(inst platform.Install, completionfile string) error {
}
defer mach.Destroy()

err = mach.QemuInst.Wait()
err = mach.QemuInst.WaitAll()
if err != nil {
return err
}
Expand Down
4 changes: 4 additions & 0 deletions mantle/platform/machine/aws/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ func (am *machine) SSH(cmd string) ([]byte, []byte, error) {
return am.cluster.SSH(am, cmd)
}

func (am *machine) IgnitionError() error {
return nil
}

func (am *machine) Reboot() error {
return platform.RebootMachine(am, am.journal)
}
Expand Down
4 changes: 4 additions & 0 deletions mantle/platform/machine/azure/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ func (am *machine) SSH(cmd string) ([]byte, []byte, error) {
return am.cluster.SSH(am, cmd)
}

func (am *machine) IgnitionError() error {
return nil
}

// Re-fetch the Public & Private IP address for the event that it's changed during the reboot
func (am *machine) refetchIPs() error {
var err error
Expand Down
4 changes: 4 additions & 0 deletions mantle/platform/machine/do/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ func (dm *machine) SSH(cmd string) ([]byte, []byte, error) {
return dm.cluster.SSH(dm, cmd)
}

func (dm *machine) IgnitionError() error {
return nil
}

func (dm *machine) Reboot() error {
return platform.RebootMachine(dm, dm.journal)
}
Expand Down
4 changes: 4 additions & 0 deletions mantle/platform/machine/esx/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ func (em *machine) SSH(cmd string) ([]byte, []byte, error) {
return em.cluster.SSH(em, cmd)
}

func (em *machine) IgnitionError() error {
return nil
}

func (em *machine) Reboot() error {
return platform.RebootMachine(em, em.journal)
}
Expand Down
4 changes: 4 additions & 0 deletions mantle/platform/machine/gcloud/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ func (gm *machine) SSH(cmd string) ([]byte, []byte, error) {
return gm.gc.SSH(gm, cmd)
}

func (gm *machine) IgnitionError() error {
return nil
}

func (gm *machine) Reboot() error {
return platform.RebootMachine(gm, gm.journal)
}
Expand Down
4 changes: 4 additions & 0 deletions mantle/platform/machine/openstack/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,10 @@ func (om *machine) SSH(cmd string) ([]byte, []byte, error) {
return om.cluster.SSH(om, cmd)
}

func (om *machine) IgnitionError() error {
return nil
}

func (om *machine) Reboot() error {
return platform.RebootMachine(om, om.journal)
}
Expand Down
4 changes: 4 additions & 0 deletions mantle/platform/machine/packet/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ func (pm *machine) SSH(cmd string) ([]byte, []byte, error) {
return pm.cluster.SSH(pm, cmd)
}

func (pm *machine) IgnitionError() error {
return nil
}

func (pm *machine) Reboot() error {
return platform.RebootMachine(pm, pm.journal)
}
Expand Down
10 changes: 10 additions & 0 deletions mantle/platform/machine/unprivqemu/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
package unprivqemu

import (
"fmt"
"io/ioutil"
"time"

Expand Down Expand Up @@ -61,6 +62,15 @@ func (m *machine) SSH(cmd string) ([]byte, []byte, error) {
return m.qc.SSH(m, cmd)
}

func (m *machine) IgnitionError() error {
_, err := m.inst.WaitIgnitionError()
if err != nil {
return err
}
// TODO render buf
return fmt.Errorf("entered emergency.target in initramfs")
}

func (m *machine) Reboot() error {
return platform.RebootMachine(m, m.journal)
}
Expand Down
3 changes: 3 additions & 0 deletions mantle/platform/platform.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ type Machine interface {
// ID returns the plaform-specific machine identifier.
ID() string

// IgnitionError returns an error if the machine failed in Ignition
IgnitionError() error

// IP returns the machine's public IP.
IP() string

Expand Down
70 changes: 70 additions & 0 deletions mantle/platform/qemu.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
package platform

import (
"bufio"
"encoding/json"
"fmt"
"io"
Expand Down Expand Up @@ -51,6 +52,8 @@ type QemuInstance struct {
tmpConfig string
swtpmTmpd string
swtpm exec.Cmd

journalPipe *os.File
}

func (inst *QemuInstance) Pid() int {
Expand Down Expand Up @@ -115,14 +118,70 @@ func (inst *QemuInstance) SSHAddress() (string, error) {
return "", fmt.Errorf("didn't find an address")
}

// Wait for the qemu process to exit
func (inst *QemuInstance) Wait() error {
return inst.qemu.Wait()
}

// WaitIgnitionError will only return if the instance
// failed inside the initramfs. The resulting string will
// be a newline-delimited stream of JSON strings, as returned
// by `journalctl -o json`.
func (inst *QemuInstance) WaitIgnitionError() (string, error) {
b := bufio.NewReaderSize(inst.journalPipe, 64768)
var r strings.Builder
iscorrupted := false
_, err := b.Peek(1)
if err != nil {
return "", errors.Wrapf(err, "Reading from journal")
}
for {
line, prefix, err := b.ReadLine()
if err != nil {
return r.String(), errors.Wrapf(err, "Reading from journal channel")
}
if prefix {
iscorrupted = true
}
if len(line) == 0 || string(line) == "{}" {
break
}
r.Write(line)
}
if iscorrupted {
return r.String(), fmt.Errorf("journal was truncated due to overly long line")
}
return r.String(), nil
}

// WaitAll wraps the process exit as well as WaitIgnitionError,
// returning an error if either fail.
func (inst *QemuInstance) WaitAll() error {
c := make(chan error)
go func() {
buf, err := inst.WaitIgnitionError()
if err != nil {
c <- err
} else {
// TODO parse buf and try to nicely render something
if buf != "" {
c <- fmt.Errorf("entered emergency.target in initramfs")
}
}
}()
go func() {
c <- inst.Wait()
}()
return <-c
}

func (inst *QemuInstance) Destroy() {
if inst.tmpConfig != "" {
os.Remove(inst.tmpConfig)
}
if inst.journalPipe != nil {
inst.journalPipe.Close()
}
if inst.qemu != nil {
if err := inst.qemu.Kill(); err != nil {
plog.Errorf("Error killing qemu instance %v: %v", inst.Pid(), err)
Expand Down Expand Up @@ -728,6 +787,17 @@ func (builder *QemuBuilder) Exec() (*QemuInstance, error) {
"-tpmdev", "emulator,id=tpm0,chardev=chrtpm", "-device", "tpm-tis,tpmdev=tpm0")
}

// Set up the virtio channel to get Ignition failures by default
journalPipeR, journalPipeW, err := os.Pipe()
if err != nil {
return nil, errors.Wrapf(err, "creating journal pipe")
}
inst.journalPipe = journalPipeR
argv = append(argv, "-device", "virtio-serial")
// https://www.redhat.com/archives/libvir-list/2015-December/msg00305.html
argv = append(argv, "-chardev", fmt.Sprintf("file,id=ignition-dracut,path=%s,append=on", builder.AddFd(journalPipeW)))
argv = append(argv, "-device", "virtserialport,chardev=ignition-dracut,name=com.coreos.ignition.journal")

fdnum := 3 // first additional file starts at position 3
for i, _ := range builder.fds {
fdset := i + 1 // Start at 1
Expand Down
14 changes: 13 additions & 1 deletion mantle/platform/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,19 @@ func StartMachineAfterReboot(m Machine, j *Journal, oldBootId string) error {

// StartMachine will start a given machine, provided the machine's journal.
func StartMachine(m Machine, j *Journal) error {
return StartMachineAfterReboot(m, j, "")
errchan := make(chan error)
go func() {
err := m.IgnitionError()
if err != nil {
plog.Infof("machine %s entered emergency.target in initramfs: %v", m.ID(), err)
errchan <- err
}
}()
go func() {
// This one ends up connecting to the journal via ssh
errchan <- StartMachineAfterReboot(m, j, "")
}()
return <-errchan
}

func GetMachineBootId(m Machine) (string, error) {
Expand Down

0 comments on commit 638ad99

Please sign in to comment.