diff --git a/python/understack-workflows/tests/test_enroll_server.py b/python/understack-workflows/tests/test_enroll_server.py index 8e3f35306..24b3e0084 100644 --- a/python/understack-workflows/tests/test_enroll_server.py +++ b/python/understack-workflows/tests/test_enroll_server.py @@ -202,8 +202,12 @@ def test_enrol_happy_path_uses_virtual_media_inspect_and_flips_back(mocker): ] fake_ironic, created_node = make_ironic_client( node_name="Dell-ABC123", - # OOB inspect, agent inspect, OOB inspect (post-RAID). - inspect_interfaces=["idrac-redfish", "idrac-redfish", "idrac-redfish"], + inspect_interfaces=[ + "idrac-redfish", + "idrac-redfish", + "idrac-redfish", + "idrac-redfish", + ], inventory=inventory, ports=ports, ) @@ -216,7 +220,7 @@ def test_enrol_happy_path_uses_virtual_media_inspect_and_flips_back(mocker): ) bmc_set_hostname = mocker.patch.object(enroll_server, "bmc_set_hostname") update_dell_bios_settings = mocker.patch.object( - enroll_server, "update_dell_bios_settings" + enroll_server, "update_dell_bios_settings", return_value={"changed": True} ) mocker.patch( "understack_workflows.ironic.client.get_ironic_client", @@ -283,6 +287,13 @@ def test_enrol_happy_path_uses_virtual_media_inspect_and_flips_back(mocker): runbook=None, disable_ramdisk=None, ), + call( + created_node.uuid, + "clean", + cleansteps=[{"interface": "management", "step": "clear_job_queue"}], + runbook=None, + disable_ramdisk=True, + ), call( created_node.uuid, "inspect", # OOB redfish inspect for bios_name / basic info @@ -292,7 +303,14 @@ def test_enrol_happy_path_uses_virtual_media_inspect_and_flips_back(mocker): ), call( created_node.uuid, - "inspect", # agent inspect via virtual media + "inspect", # agent inspect + cleansteps=None, + runbook=None, + disable_ramdisk=None, + ), + call( + created_node.uuid, + "inspect", # second agent inspect to apply BIOS changes cleansteps=None, runbook=None, disable_ramdisk=None, @@ -334,6 +352,9 @@ def test_enrol_happy_path_uses_virtual_media_inspect_and_flips_back(mocker): call(created_node.uuid, expected_ipxe_boot), call(created_node.uuid, expected_agent), call(created_node.uuid, expected_ipxe_boot), + call(created_node.uuid, expected_ipxe_boot), + call(created_node.uuid, expected_agent), + call(created_node.uuid, expected_ipxe_boot), call(created_node.uuid, expected_reset), # Post-RAID OOB inspect prep ] @@ -371,7 +392,9 @@ def test_enrol_existing_failed_node_recovers_and_updates(mocker): mocker.patch.object(enroll_server, "set_bmc_password") mocker.patch.object(enroll_server, "update_dell_drac_settings") mocker.patch.object(enroll_server, "bmc_set_hostname") - mocker.patch.object(enroll_server, "update_dell_bios_settings") + mocker.patch.object( + enroll_server, "update_dell_bios_settings", return_value={"changed": True} + ) mocker.patch( "understack_workflows.ironic.client.get_ironic_client", return_value=fake_ironic, @@ -396,6 +419,13 @@ def test_enrol_existing_failed_node_recovers_and_updates(mocker): runbook=None, disable_ramdisk=None, ), + call( + existing_node.uuid, + "clean", + cleansteps=[{"interface": "management", "step": "clear_job_queue"}], + runbook=None, + disable_ramdisk=True, + ), call( existing_node.uuid, "inspect", # OOB inspect @@ -405,7 +435,14 @@ def test_enrol_existing_failed_node_recovers_and_updates(mocker): ), call( existing_node.uuid, - "inspect", # Agent inspect via virtual media + "inspect", # Agent inspect + cleansteps=None, + runbook=None, + disable_ramdisk=None, + ), + call( + existing_node.uuid, + "inspect", # second agent inspect to apply BIOS changes cleansteps=None, runbook=None, disable_ramdisk=None, diff --git a/python/understack-workflows/understack_workflows/bmc_bios.py b/python/understack-workflows/understack_workflows/bmc_bios.py index c6c20b48a..66dca8ecf 100644 --- a/python/understack-workflows/understack_workflows/bmc_bios.py +++ b/python/understack-workflows/understack_workflows/bmc_bios.py @@ -24,6 +24,9 @@ def required_bios_settings(pxe_interface: str) -> dict[str, str]: "IPMILan.1.Enable": "Disabled", # PXE is enabled by default on DELL, but we don't use it: "PxeDev1EnDis": "Disabled", + "PxeDev2EnDis": "Disabled", + "PxeDev3EnDis": "Disabled", + "PxeDev4EnDis": "Disabled", # Configure exactly one HTTP port for booting: "HttpDev1Interface": pxe_interface, "HttpDev1EnDis": "Enabled", diff --git a/python/understack-workflows/understack_workflows/ironic_node.py b/python/understack-workflows/understack_workflows/ironic_node.py index 95821e3f4..b8ce8b230 100644 --- a/python/understack-workflows/understack_workflows/ironic_node.py +++ b/python/understack-workflows/understack_workflows/ironic_node.py @@ -162,6 +162,28 @@ def create_ironic_node( return client.create_node(node_data) +def clear_pending_idrac_jobs(node: Node): + logger.info("%s performing clear_job_queue clean step", node.uuid) + transition( + node, + target_state="clean", + expected_state="manageable", + clean_steps=[{"interface": "management", "step": "clear_job_queue"}], + disable_ramdisk=True, + ) + + +def reset_idrac_to_known_good_state(node: Node): + logger.info("%s performing known_good_state clean step", node.uuid) + transition( + node, + target_state="clean", + expected_state="manageable", + clean_steps=[{"interface": "management", "step": "known_good_state"}], + disable_ramdisk=True, + ) + + def _driver_for(manufacturer: str) -> tuple[str, str]: """Answer the (driver, inspect_interface) for this server.""" if manufacturer.startswith("Dell"): diff --git a/python/understack-workflows/understack_workflows/main/enroll_server.py b/python/understack-workflows/understack_workflows/main/enroll_server.py index c9f34731d..5efc9616f 100644 --- a/python/understack-workflows/understack_workflows/main/enroll_server.py +++ b/python/understack-workflows/understack_workflows/main/enroll_server.py @@ -65,6 +65,7 @@ def main() -> None: firmware_update=args.firmware_update, raid_configure=args.raid_configure, external_cmdb_id=args.external_cmdb_id, + reset_idrac=args.reset_idrac, ) @@ -74,6 +75,7 @@ def enroll( raid_configure: bool, old_password: str | None, external_cmdb_id: str | None = None, + reset_idrac: bool = False, ) -> None: logger.info("Starting enroll workflow for bmc_ip_address=%s", ip_address) @@ -90,6 +92,13 @@ def enroll( external_cmdb_id=external_cmdb_id, ) + # Clear stale iDRAC jobs before virtual-media inspection, or optionally + # reset the controller to a broader known-good state. + if reset_idrac: + ironic_node.reset_idrac_to_known_good_state(node) + else: + ironic_node.clear_pending_idrac_jobs(node) + # Out-of-band redfish inspection populates data including baremetal ports. # # Our hooks augment the ironic baremetal port with the BMC-reported @@ -109,7 +118,6 @@ def enroll( # Therefore, we only use virtual media during our "enroll" phase, when the # port data is set up in a manner that suits the Neutron algorithm. If a # normal PXE/HTTP port is available then we use it instead: - virtual_media = not bool(ironic_node.pxe_enabled_bios_name(node)) agent_inspection(node, virtual_media=virtual_media) @@ -121,7 +129,10 @@ def enroll( ) logger.info("[node:%s] Selected PXE interface %s", node.uuid, pxe_interface) - update_dell_bios_settings(bmc, pxe_interface=pxe_interface) + # This sets the boot device to use for all future HTTP boots: + if update_dell_bios_settings(bmc, pxe_interface=pxe_interface): + logger.info("%s performing second inspection write BIOS settings", node.uuid) + agent_inspection(node) if raid_configure: configure_raid(node, bmc) @@ -273,6 +284,12 @@ def argument_parser(): default=True, help="Configure RAID before inspection", ) + parser.add_argument( + "--reset-idrac", + type=parse_bool, + default=False, + help="Reset iDRAC to known_good_state instead of clear_job_queue", + ) parser.add_argument( "--external-cmdb-id", required=False,