diff --git a/mantle/cmd/kola/qemuexec.go b/mantle/cmd/kola/qemuexec.go index 7fa62c4b35..996a4cf240 100644 --- a/mantle/cmd/kola/qemuexec.go +++ b/mantle/cmd/kola/qemuexec.go @@ -50,7 +50,8 @@ var ( ignitionFragments []string - forceConfigInjection bool + forceConfigInjection bool + propagateInitramfsFailure bool ) func init() { @@ -63,6 +64,8 @@ func init() { cmdQemuExec.Flags().IntVarP(&memory, "memory", "m", 0, "Memory in MB") cmdQemuExec.Flags().StringVarP(&ignition, "ignition", "i", "", "Path to ignition config") cmdQemuExec.Flags().BoolVarP(&forceConfigInjection, "inject-ignition", "", false, "Force injecting Ignition config using guestfs") + cmdQemuExec.Flags().BoolVar(&propagateInitramfsFailure, "propagate-initramfs-failure", false, "Error out if the system fails in the initramfs") + } func renderFragments(config v3types.Config) (*v3types.Config, error) { @@ -141,8 +144,13 @@ func runQemuExec(cmd *cobra.Command, args []string) error { return err } - // Ignore errors - _ = inst.Wait() - - return nil + if propagateInitramfsFailure { + err = inst.WaitAll() + if err != nil { + return err + } + return nil + } else { + return inst.Wait() + } } diff --git a/mantle/cmd/kola/testiso.go b/mantle/cmd/kola/testiso.go index dbaeeebd31..9d49f255b6 100644 --- a/mantle/cmd/kola/testiso.go +++ b/mantle/cmd/kola/testiso.go @@ -202,7 +202,7 @@ func testPXE(inst platform.Install, completionfile string) error { } defer mach.Destroy() - err = mach.QemuInst.Wait() + err = mach.QemuInst.WaitAll() if err != nil { return err } diff --git a/mantle/platform/machine/aws/machine.go b/mantle/platform/machine/aws/machine.go index f97705d7a4..e178917605 100644 --- a/mantle/platform/machine/aws/machine.go +++ b/mantle/platform/machine/aws/machine.go @@ -64,6 +64,10 @@ func (am *machine) SSH(cmd string) ([]byte, []byte, error) { return am.cluster.SSH(am, cmd) } +func (am *machine) IgnitionError() error { + return nil +} + func (am *machine) Reboot() error { return platform.RebootMachine(am, am.journal) } diff --git a/mantle/platform/machine/azure/machine.go b/mantle/platform/machine/azure/machine.go index b9ef0002dd..3d58847a41 100644 --- a/mantle/platform/machine/azure/machine.go +++ b/mantle/platform/machine/azure/machine.go @@ -74,6 +74,10 @@ func (am *machine) SSH(cmd string) ([]byte, []byte, error) { return am.cluster.SSH(am, cmd) } +func (am *machine) IgnitionError() error { + return nil +} + // Re-fetch the Public & Private IP address for the event that it's changed during the reboot func (am *machine) refetchIPs() error { var err error diff --git a/mantle/platform/machine/do/machine.go b/mantle/platform/machine/do/machine.go index beff9df18f..24e413e609 100644 --- a/mantle/platform/machine/do/machine.go +++ b/mantle/platform/machine/do/machine.go @@ -61,6 +61,10 @@ func (dm *machine) SSH(cmd string) ([]byte, []byte, error) { return dm.cluster.SSH(dm, cmd) } +func (dm *machine) IgnitionError() error { + return nil +} + func (dm *machine) Reboot() error { return platform.RebootMachine(dm, dm.journal) } diff --git a/mantle/platform/machine/esx/machine.go b/mantle/platform/machine/esx/machine.go index 793fff998d..b6811c0adb 100644 --- a/mantle/platform/machine/esx/machine.go +++ b/mantle/platform/machine/esx/machine.go @@ -62,6 +62,10 @@ func (em *machine) SSH(cmd string) ([]byte, []byte, error) { return em.cluster.SSH(em, cmd) } +func (em *machine) IgnitionError() error { + return nil +} + func (em *machine) Reboot() error { return platform.RebootMachine(em, em.journal) } diff --git a/mantle/platform/machine/gcloud/machine.go b/mantle/platform/machine/gcloud/machine.go index d57bcc7c8f..89f32d8358 100644 --- a/mantle/platform/machine/gcloud/machine.go +++ b/mantle/platform/machine/gcloud/machine.go @@ -62,6 +62,10 @@ func (gm *machine) SSH(cmd string) ([]byte, []byte, error) { return gm.gc.SSH(gm, cmd) } +func (gm *machine) IgnitionError() error { + return nil +} + func (gm *machine) Reboot() error { return platform.RebootMachine(gm, gm.journal) } diff --git a/mantle/platform/machine/openstack/machine.go b/mantle/platform/machine/openstack/machine.go index 7d19b47062..33655251ad 100644 --- a/mantle/platform/machine/openstack/machine.go +++ b/mantle/platform/machine/openstack/machine.go @@ -83,6 +83,10 @@ func (om *machine) SSH(cmd string) ([]byte, []byte, error) { return om.cluster.SSH(om, cmd) } +func (om *machine) IgnitionError() error { + return nil +} + func (om *machine) Reboot() error { return platform.RebootMachine(om, om.journal) } diff --git a/mantle/platform/machine/packet/machine.go b/mantle/platform/machine/packet/machine.go index 10768b1300..de6bb1384f 100644 --- a/mantle/platform/machine/packet/machine.go +++ b/mantle/platform/machine/packet/machine.go @@ -61,6 +61,10 @@ func (pm *machine) SSH(cmd string) ([]byte, []byte, error) { return pm.cluster.SSH(pm, cmd) } +func (pm *machine) IgnitionError() error { + return nil +} + func (pm *machine) Reboot() error { return platform.RebootMachine(pm, pm.journal) } diff --git a/mantle/platform/machine/unprivqemu/machine.go b/mantle/platform/machine/unprivqemu/machine.go index 7fb8a9b093..faf2de2539 100644 --- a/mantle/platform/machine/unprivqemu/machine.go +++ b/mantle/platform/machine/unprivqemu/machine.go @@ -15,6 +15,7 @@ package unprivqemu import ( + "fmt" "io/ioutil" "time" @@ -61,6 +62,15 @@ func (m *machine) SSH(cmd string) ([]byte, []byte, error) { return m.qc.SSH(m, cmd) } +func (m *machine) IgnitionError() error { + _, err := m.inst.WaitIgnitionError() + if err != nil { + return err + } + // TODO render buf + return fmt.Errorf("entered emergency.target in initramfs") +} + func (m *machine) Reboot() error { return platform.RebootMachine(m, m.journal) } diff --git a/mantle/platform/platform.go b/mantle/platform/platform.go index a723d0a447..aca335fc3a 100644 --- a/mantle/platform/platform.go +++ b/mantle/platform/platform.go @@ -51,6 +51,9 @@ type Machine interface { // ID returns the plaform-specific machine identifier. ID() string + // IgnitionError returns an error if the machine failed in Ignition + IgnitionError() error + // IP returns the machine's public IP. IP() string diff --git a/mantle/platform/qemu.go b/mantle/platform/qemu.go index 6c90d3e6a9..50a2f0eb51 100644 --- a/mantle/platform/qemu.go +++ b/mantle/platform/qemu.go @@ -15,6 +15,7 @@ package platform import ( + "bufio" "encoding/json" "fmt" "io" @@ -51,6 +52,8 @@ type QemuInstance struct { tmpConfig string swtpmTmpd string swtpm exec.Cmd + + journalPipe *os.File } func (inst *QemuInstance) Pid() int { @@ -115,14 +118,73 @@ func (inst *QemuInstance) SSHAddress() (string, error) { return "", fmt.Errorf("didn't find an address") } +// Wait for the qemu process to exit func (inst *QemuInstance) Wait() error { return inst.qemu.Wait() } +// WaitIgnitionError will only return if the instance +// failed inside the initramfs. The resulting string will +// be a newline-delimited stream of JSON strings, as returned +// by `journalctl -o json`. +func (inst *QemuInstance) WaitIgnitionError() (string, error) { + b := bufio.NewReaderSize(inst.journalPipe, 64768) + var r strings.Builder + iscorrupted := false + _, err := b.Peek(1) + if err != nil { + if err == io.EOF { + return "", nil + } + return "", errors.Wrapf(err, "Reading from journal") + } + for { + line, prefix, err := b.ReadLine() + if err != nil { + return r.String(), errors.Wrapf(err, "Reading from journal channel") + } + if prefix { + iscorrupted = true + } + if len(line) == 0 || string(line) == "{}" { + break + } + r.Write(line) + } + if iscorrupted { + return r.String(), fmt.Errorf("journal was truncated due to overly long line") + } + return r.String(), nil +} + +// WaitAll wraps the process exit as well as WaitIgnitionError, +// returning an error if either fail. +func (inst *QemuInstance) WaitAll() error { + c := make(chan error) + go func() { + buf, err := inst.WaitIgnitionError() + if err != nil { + c <- err + } else { + // TODO parse buf and try to nicely render something + if buf != "" { + c <- fmt.Errorf("entered emergency.target in initramfs") + } + } + }() + go func() { + c <- inst.Wait() + }() + return <-c +} + func (inst *QemuInstance) Destroy() { if inst.tmpConfig != "" { os.Remove(inst.tmpConfig) } + if inst.journalPipe != nil { + inst.journalPipe.Close() + } if inst.qemu != nil { if err := inst.qemu.Kill(); err != nil { plog.Errorf("Error killing qemu instance %v: %v", inst.Pid(), err) @@ -728,6 +790,17 @@ func (builder *QemuBuilder) Exec() (*QemuInstance, error) { "-tpmdev", "emulator,id=tpm0,chardev=chrtpm", "-device", "tpm-tis,tpmdev=tpm0") } + // Set up the virtio channel to get Ignition failures by default + journalPipeR, journalPipeW, err := os.Pipe() + if err != nil { + return nil, errors.Wrapf(err, "creating journal pipe") + } + inst.journalPipe = journalPipeR + argv = append(argv, "-device", "virtio-serial") + // https://www.redhat.com/archives/libvir-list/2015-December/msg00305.html + argv = append(argv, "-chardev", fmt.Sprintf("file,id=ignition-dracut,path=%s,append=on", builder.AddFd(journalPipeW))) + argv = append(argv, "-device", "virtserialport,chardev=ignition-dracut,name=com.coreos.ignition.journal") + fdnum := 3 // first additional file starts at position 3 for i, _ := range builder.fds { fdset := i + 1 // Start at 1 diff --git a/mantle/platform/util.go b/mantle/platform/util.go index f7c97dd272..4b48d37877 100644 --- a/mantle/platform/util.go +++ b/mantle/platform/util.go @@ -183,7 +183,19 @@ func StartMachineAfterReboot(m Machine, j *Journal, oldBootId string) error { // StartMachine will start a given machine, provided the machine's journal. func StartMachine(m Machine, j *Journal) error { - return StartMachineAfterReboot(m, j, "") + errchan := make(chan error) + go func() { + err := m.IgnitionError() + if err != nil { + plog.Infof("machine %s entered emergency.target in initramfs: %v", m.ID(), err) + errchan <- err + } + }() + go func() { + // This one ends up connecting to the journal via ssh + errchan <- StartMachineAfterReboot(m, j, "") + }() + return <-errchan } func GetMachineBootId(m Machine) (string, error) {