Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MGMT-3026 - invoke fio from installcmd #806

Merged
merged 1 commit into from
Dec 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions internal/bminventory/inventory.go
Original file line number Diff line number Diff line change
Expand Up @@ -2433,6 +2433,10 @@ func (b *bareMetalInventory) PostStepReply(ctx context.Context, params installer
func (b *bareMetalInventory) handleReplyError(params installer.PostStepReplyParams, ctx context.Context, log logrus.FieldLogger, h *models.Host) error {

if params.Reply.StepType == models.StepTypeInstall {
if params.Reply.ExitCode == host.FioPerfCheckCmdExitCode {
log.Warnf("FIO performance check: %s", params.Reply.Error)
return b.hostApi.HandlePrepareInstallationFailure(ctx, h, params.Reply.Error)
}
// Handle case of installation error due to an already running assisted-installer.
if params.Reply.ExitCode == ContainerAlreadyRunningExitCode && strings.Contains(params.Reply.Error, "the container name \"assisted-installer\" is already in use") {
log.Warnf("Install command failed due to an already running installation: %s", params.Reply.Error)
Expand Down
80 changes: 80 additions & 0 deletions internal/host/fioperfcheckcmd.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package host

import (
"context"
"encoding/json"
"fmt"
"strconv"
"strings"

"github.com/openshift/assisted-service/models"
"github.com/sirupsen/logrus"
)

const (
FioPerfCheckCmdExitCode int64 = 222
FioDurationThreshold int64 = 20
)

type fioPerfCheckCmd struct {
baseCmd
fioPerfCheckImage string
path string
durationThreshold int64
}

func NewFioPerfCheckCmd(log logrus.FieldLogger, fioPerfCheckImage string, path string, durationThreshold int64) *fioPerfCheckCmd {
return &fioPerfCheckCmd{
baseCmd: baseCmd{log: log},
fioPerfCheckImage: fioPerfCheckImage,
path: path,
durationThreshold: durationThreshold,
}
}

func (c *fioPerfCheckCmd) GetSteps(ctx context.Context, host *models.Host) ([]*models.Step, error) {
args, err := c.GetArgs()
if err != nil {
return nil, err
}

step := &models.Step{
StepType: models.StepTypeFioPerfCheck,
Command: "podman",
Args: args,
}
return []*models.Step{step}, nil
}

func (c *fioPerfCheckCmd) GetArgs() ([]string, error) {
exitCode := FioPerfCheckCmdExitCode
request := models.FioPerfCheckRequest{
Path: &c.path,
DurationThreshold: &c.durationThreshold,
ExitCode: &exitCode,
}
requestBytes, err := json.Marshal(request)
if err != nil {
c.log.WithError(err).Errorf("failed to marshal FioPerfCheckRequest")
return nil, err
}

return []string{
"run", "--privileged", "--net=host", "--rm", "--quiet",
"-v", "/dev:/dev:rw",
"-v", "/var/log:/var/log",
"-v", "/run/systemd/journal/socket:/run/systemd/journal/socket",
c.fioPerfCheckImage,
"fio_perf_check",
strconv.Quote(string(requestBytes)),
}, nil
}

func (c *fioPerfCheckCmd) GetCommandString() string {
args, err := c.GetArgs()
if err != nil {
return ""
}

return fmt.Sprintf("podman %s && ", strings.Join(args, " "))
}
15 changes: 15 additions & 0 deletions internal/host/host.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ type API interface {
RegisterHost(ctx context.Context, h *models.Host, db *gorm.DB) error
RegisterInstalledOCPHost(ctx context.Context, h *models.Host, db *gorm.DB) error
HandleInstallationFailure(ctx context.Context, h *models.Host) error
HandlePrepareInstallationFailure(ctx context.Context, h *models.Host, reason string) error
UpdateInstallProgress(ctx context.Context, h *models.Host, progress *models.HostProgress) error
RefreshStatus(ctx context.Context, h *models.Host, db *gorm.DB) error
SetBootstrap(ctx context.Context, h *models.Host, isbootstrap bool, db *gorm.DB) error
Expand Down Expand Up @@ -240,6 +241,20 @@ func (m *Manager) populateDisksEligibility(inventoryString string) (string, erro
return string(result), nil
}

func (m *Manager) HandlePrepareInstallationFailure(ctx context.Context, h *models.Host, reason string) error {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isn't it the same as HandleInstallationFailure?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It adds the reason string.


lastStatusUpdateTime := h.StatusUpdatedAt
err := m.sm.Run(TransitionTypeHostInstallationFailed, newStateHost(h), &TransitionArgsHostInstallationFailed{
ctx: ctx,
reason: reason,
})
if err == nil {
m.reportInstallationMetrics(ctx, h, &models.HostProgressInfo{CurrentStage: "installation command failed",
StageStartedAt: lastStatusUpdateTime}, models.HostStageFailed)
}
return err
}

func (m *Manager) UpdateInventory(ctx context.Context, h *models.Host, inventory string) error {
hostStatus := swag.StringValue(h.Status)
allowedStatuses := append(hostStatusesBeforeInstallation[:], models.HostStatusInstallingInProgress)
Expand Down
4 changes: 2 additions & 2 deletions internal/host/installcmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,14 +147,14 @@ func (i *installCmd) GetSteps(ctx context.Context, host *models.Host) ([]*models
if err = t.Execute(buf, data); err != nil {
return nil, err
}
step.Args = []string{"-c", buf.String()}

unbootableCmd, err := i.getDiskUnbootableCmd(ctx, *host)
if err != nil {
return nil, err
}

step.Args = []string{"-c", unbootableCmd + buf.String()}
fioPerfCheckCmd := NewFioPerfCheckCmd(i.log, i.instructionConfig.FioPerfCheckImage, bootdevice, FioDurationThreshold)
step.Args = []string{"-c", unbootableCmd + fioPerfCheckCmd.GetCommandString() + buf.String()}

if _, err := UpdateHost(i.log, i.db, host.ClusterID, *host.ID, *host.Status,
"installer_version", i.instructionConfig.InstallerImage, "installation_disk_path", bootdevice); err != nil {
Expand Down
8 changes: 8 additions & 0 deletions internal/host/installcmd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ var DefaultInstructionConfig = InstructionConfig{
InstallerImage: "quay.io/ocpmetal/assisted-installer:latest",
ControllerImage: "quay.io/ocpmetal/assisted-installer-controller:latest",
InventoryImage: "quay.io/ocpmetal/assisted-installer-agent:latest",
FioPerfCheckImage: "quay.io/ocpmetal/assisted-installer-agent:latest",
InstallationTimeout: 120,
ReleaseImageMirror: "local.registry:5000/ocp@sha256:eab93b4591699a5a4ff50ad3517892653f04fb840127895bb3609b3cc68f98f3",
}
Expand Down Expand Up @@ -365,6 +366,13 @@ func validateInstallCommand(reply *models.Step, role models.HostRole, clusterId
"--boot-device /dev/sdb --host-id %s --openshift-version %s --mco-image mcoImage " +
"--controller-image %s --url %s --insecure=false --agent-image %s --installation-timeout %s"

fioPerfCheckCmd := "podman run --privileged --net=host --rm --quiet -v /dev:/dev:rw -v /var/log:/var/log " +
"-v /run/systemd/journal/socket:/run/systemd/journal/socket " +
"quay.io/ocpmetal/assisted-installer-agent:latest fio_perf_check " +
"\"{\\\"duration_threshold\\\":20,\\\"exit_code\\\":222,\\\"path\\\":\\\"/dev/sdb\\\"}\" && "

installCommand = fioPerfCheckCmd + installCommand

if proxy != "" {
installCommand += fmt.Sprintf(" %s", proxy)
} else if bootableDisks != nil {
Expand Down
1 change: 1 addition & 0 deletions internal/host/instructionmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ type InstructionConfig struct {
DhcpLeaseAllocatorImage string `envconfig:"DHCP_LEASE_ALLOCATOR_IMAGE" default:"quay.io/ocpmetal/assisted-installer-agent:latest"`
APIVIPConnectivityCheckImage string `envconfig:"API_VIP_CONNECTIVITY_CHECK_IMAGE" default:"quay.io/ocpmetal/assisted-installer-agent:latest"`

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

need to add to openshift/template.yaml

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

NtpSynchronizerImage string `envconfig:"NTP_SYNCHRONIZER_IMAGE" default:"quay.io/ocpmetal/assisted-installer-agent:latest"`
FioPerfCheckImage string `envconfig:"FIO_PERF_CHECK_IMAGE" default:"quay.io/ocpmetal/assisted-installer-agent:latest"`
SkipCertVerification bool `envconfig:"SKIP_CERT_VERIFICATION" default:"false"`
SupportL2 bool `envconfig:"SUPPORT_L2" default:"true"`
InstallationTimeout uint `envconfig:"INSTALLATION_TIMEOUT" default:"0"`
Expand Down
17 changes: 15 additions & 2 deletions internal/host/mock_host_api.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions models/disk.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

98 changes: 98 additions & 0 deletions models/fio_perf_check_request.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

43 changes: 43 additions & 0 deletions models/fio_perf_check_response.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading