Skip to content

Commit

Permalink
Add preconfig step ExtraConfigMevFwUp
Browse files Browse the repository at this point in the history
Provide a pre-config to put the MeV firmware into a good state.

Reflashing the firmware takes a very long time. By default, it will not
do this if the firmware is already on the desired version.

However, if we specify to force, we will reflash regardless of the
current state.

Signed-off-by: Salvatore Daniele <[email protected]>
  • Loading branch information
SalDaniele committed Jan 22, 2025
1 parent 9a8167e commit 91775cf
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 8 deletions.
4 changes: 4 additions & 0 deletions clustersConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ class ExtraConfigArgs:

base_image: str = ""

mev_version: str = ""

force_mev_fw_up: bool = False

def pre_check(self) -> None:
if self.sriov_network_operator_local:
if self.name != "sriov_network_operator":
Expand Down
61 changes: 61 additions & 0 deletions extraConfigMev.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from clustersConfig import ClustersConfig
import host
from logger import logger
from clustersConfig import ExtraConfigArgs
from bmc import BMC
from concurrent.futures import Future
from typing import Optional
import time

LATEST_MEV_FW = "1.8.0.10052"


def ExtraConfigMevFwUp(cc: ClustersConfig, cfg: ExtraConfigArgs, futures: dict[str, Future[Optional[host.Result]]]) -> None:
logger.info("Running pre config step to flash MeV firmware on IPU IMC")

# This preconfig step is expected to run on an IMC only
assert cc.kind == "iso"
master = cc.masters[0]
assert master.kind == "ipu"
assert master.host_side_bmc is not None
imc = host.Host(master.bmc)

# Check if a particular firmware version is being requested or if we will use default
if cfg.mev_version == "":
logger.info("Desired MeV fw release not specified, will install the latest by default")
cfg.mev_version = LATEST_MEV_FW
logger.info(f"Will ensure {master.bmc} is on firmware version: {cfg.mev_version}")

# We should only perform an update if it is required, or if the user insists we do so
if not cfg.force_mev_fw_up:
logger.info("Checking if firmware update is required")
if imc.ping():
imc.ssh_connect(master.bmc_user, master.bmc_password)
ret = imc.run("cat /etc/issue.net")
if cfg.mev_version in ret.out:
logger.info(f"Current MeV fw version is {ret.out.strip()}, no need to update")
return

# Perform upgrade
lh = host.LocalHost()

fw_up_cmd = f"--dpu-type ipu --imc-address {master.bmc} firmware up --version {cfg.mev_version}"

ret = lh.run_in_container(fw_up_cmd, interactive=True)

if not ret.success():
logger.error_and_exit(f"Failed to flash new firmware. Error: {ret.err}")

# Perform coldboot to apply the change
ipu_host_bmc = BMC.from_bmc(master.host_side_bmc)
ipu_host_bmc.cold_boot()
# Cold boot should also reboot IMC, give time to settle before trying to ping IMC
time.sleep(20)

# Access the IMC to validate the flash was successful
imc.ssh_connect(master.bmc_user, master.bmc_password)
ret = imc.run("cat /etc/issue.net")
if cfg.mev_version not in ret.out or ret.returncode != 0:
logger.error_and_exit(f"Mev firmware release is not the expected version: {ret.out}")

logger.info("MeV firmware flash complete")
2 changes: 2 additions & 0 deletions extraConfigRunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from extraConfigMicroshift import ExtraConfigMicroshift
from extraConfigRhSubscription import ExtraConfigRhSubscription
from extraConfigDpu import ExtraConfigDpu, ExtraConfigDpuHost
from extraConfigMev import ExtraConfigMevFwUp
from clustersConfig import ClustersConfig
from clustersConfig import ExtraConfigArgs
from concurrent.futures import Future
Expand Down Expand Up @@ -51,6 +52,7 @@ def __init__(self, cc: ClustersConfig):
"rh_subscription": ExtraConfigRhSubscription,
"dpu_operator_host": ExtraConfigDpuHost,
"dpu_operator_dpu": ExtraConfigDpu,
"mev_firmware_up": ExtraConfigMevFwUp,
}

def run(self, to_run: ExtraConfigArgs, futures: dict[str, Future[Optional[host.Result]]]) -> None:
Expand Down
16 changes: 8 additions & 8 deletions host.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,14 @@ def run_or_die(self, cmd: str) -> Result:
logger.debug(ret.out.strip())
return ret

def run_in_container(self, cmd: str, interactive: bool = False, verbose: bool = True, dry_run: bool = False) -> Result:
name = "dpu-tools"
it = "-it" if interactive else ""
v = "--verbose" if verbose else ""
d = "--dry-run" if dry_run else ""
full_command = f"sudo podman run {it} --rm --pull always --replace --pid host --network host --user 0 --name {name} --privileged -v /dev:/dev quay.io/bnemeth/bf {v} {d} {cmd}"
return self.run(full_command, logging.INFO)

def close(self) -> None:
assert self._host is not None
self._host.close()
Expand Down Expand Up @@ -443,14 +451,6 @@ def cx_firmware_upgrade(self) -> Result:
logger.info("Upgrading CX firmware")
return self.run_in_container("utils cx-fwup")

def run_in_container(self, cmd: str, interactive: bool = False, verbose: bool = True, dry_run: bool = False) -> Result:
name = "dpu-tools"
it = "-it" if interactive else ""
v = "--verbose" if verbose else ""
d = "--dry-run" if dry_run else ""
full_command = f"sudo podman run {it} --rm --pull always --replace --pid host --network host --user 0 --name {name} --privileged -v /dev:/dev quay.io/bnemeth/bf {v} {d} {cmd}"
return self.run(full_command, logging.DEBUG)


class HostWithBF2(Host):
def connect_to_bf(self, bf_addr: str) -> None:
Expand Down

0 comments on commit 91775cf

Please sign in to comment.