Skip to content

Commit faf2ee2

Browse files
juliakregerelfosardo
authored andcommitted
Multipath Hardware path handling
Removes multipath base devices from consideration by default, and instead allows the device-mapper device managed by multipath to be picked up and utilized instead. In effect, allowing us to ignore standby paths *and* leverage multiple concurrent IO paths if so offered via ALUA. In reality, anyone who has previously built IPA with multipath tooling might not have encountered issues previously because they used Active/Active SAN storage environments. They would have worked because the IO lock would have been exchanged between controllers and paths. However, Active/Passive environments will block passive paths from access, ultimately preventing new locks from being established without proper negotiation. Ultimately requiring multipathing *and* the agent to be smart enough to know to disqualify underlying paths to backend storage volumes. An additional benefit of this is active/active MPIO devices will, as long as ``multipath`` is present inside the ramdisk, no longer possibly result in duplicate IO wipes occuring accross numerous devices. Story: #2010003 Task: #45108 Resolves: rhbz#2076622 Resolves: rhbz#2070519 Change-Id: I0fd6356f036d5ff17510fb838eaf418164cdfc92 (cherry picked from commit 014d377)
1 parent 345d459 commit faf2ee2

File tree

7 files changed

+722
-49
lines changed

7 files changed

+722
-49
lines changed

ironic_python_agent/hardware.py

Lines changed: 150 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@
8181
}
8282
}
8383

84+
MULTIPATH_ENABLED = None
85+
8486

8587
def _get_device_info(dev, devclass, field):
8688
"""Get the device info according to device class and field."""
@@ -138,6 +140,36 @@ def _load_ipmi_modules():
138140
il_utils.try_execute('modprobe', 'ipmi_si')
139141

140142

143+
def _load_multipath_modules():
144+
"""Load multipath modules
145+
146+
This is required to be able to collect multipath information.
147+
148+
Two separate paths exist, one with a helper utility for Centos/RHEL
149+
and another which is just load the modules, and trust multipathd
150+
will do the needful.
151+
"""
152+
if (os.path.isfile('/usr/sbin/mpathconf')
153+
and not os.path.isfile('/etc/multipath.conf')):
154+
# For Centos/Rhel/Etc which uses mpathconf, this does
155+
# a couple different things, including configuration generation...
156+
# which is not *really* required.. at least *shouldn't* be.
157+
# WARNING(TheJulia): This command explicitly replaces local
158+
# configuration.
159+
il_utils.try_execute('/usr/sbin/mpathconf', '--enable',
160+
'--find_multipaths', 'yes',
161+
'--with_module', 'y',
162+
'--with_multipathd', 'y')
163+
else:
164+
# Ensure modules are loaded. Configuration is not required
165+
# and implied based upon compiled in defaults.
166+
# NOTE(TheJulia): Debian/Ubuntu specifically just document
167+
# using `multipath -t` output to start a new configuration
168+
# file, if needed.
169+
il_utils.try_execute('modprobe', 'dm_multipath')
170+
il_utils.try_execute('modprobe', 'multipath')
171+
172+
141173
def _check_for_iscsi():
142174
"""Connect iSCSI shared connected via iBFT or OF.
143175
@@ -181,6 +213,84 @@ def _get_md_uuid(raid_device):
181213
return match.group(1)
182214

183215

216+
def _enable_multipath():
217+
"""Initialize multipath IO if possible.
218+
219+
:returns: True if the multipathd daemon and multipath command to enumerate
220+
devices was scucessfully able to be called.
221+
"""
222+
try:
223+
_load_multipath_modules()
224+
# This might not work, ideally it *should* already be running...
225+
# NOTE(TheJulia): Testing locally, a prior running multipathd, the
226+
# explicit multipathd start just appears to silently exit with a
227+
# result code of 0.
228+
il_utils.execute('multipathd')
229+
# This is mainly to get the system to actually do the needful and
230+
# identify/enumerate paths by combining what it can detect and what
231+
# it already knows. This may be useful, and in theory this should be
232+
# logged in the IPA log should it be needed.
233+
il_utils.execute('multipath', '-ll')
234+
return True
235+
except FileNotFoundError as e:
236+
LOG.warning('Attempted to determine if multipath tools were present. '
237+
'Not detected. Error recorded: %s', e)
238+
return False
239+
except processutils.ProcessExecutionError as e:
240+
LOG.warning('Attempted to invoke multipath utilities, but we '
241+
'encountered an error: %s', e)
242+
return False
243+
244+
245+
def _get_multipath_parent_device(device):
246+
"""Check and return a multipath device."""
247+
if not device:
248+
# if lsblk provides invalid output, this can be None.
249+
return
250+
check_device = os.path.join('/dev', str(device))
251+
try:
252+
# Explicitly run the check as regardless of if the device is mpath or
253+
# not, multipath tools when using list always exits with a return
254+
# code of 0.
255+
utils.execute('multipath', '-c', check_device)
256+
# path check with return an exit code of 1 if you send it a multipath
257+
# device mapper device, like dm-0.
258+
# NOTE(TheJulia): -ll is supposed to load from all available
259+
# information, but may not force a rescan. It may be -f if we need
260+
# that. That being said, it has been about a decade since I was
261+
# running multipath tools on SAN connected gear, so my memory is
262+
# definitely fuzzy.
263+
out, _ = utils.execute('multipath', '-ll', check_device)
264+
except processutils.ProcessExecutionError as e:
265+
# FileNotFoundError if the utility does not exist.
266+
# -1 return code if the device is not valid.
267+
LOG.debug('Checked device %(dev)s and determined it was '
268+
'not a multipath device. %(error)s',
269+
{'dev': check_device,
270+
'error': e})
271+
return
272+
except FileNotFoundError:
273+
# This should never happen, as MULTIPATH_ENABLED would be False
274+
# before this occurs.
275+
LOG.warning('Attempted to check multipathing status, however '
276+
'the \'multipath\' binary is missing or not in the '
277+
'execution PATH.')
278+
return
279+
# Data format:
280+
# MPATHDEVICENAME dm-0 TYPE,HUMANNAME
281+
# size=56G features='1 retain_attached_hw_handler' hwhandler='0' wp=rw
282+
# `-+- policy='service-time 0' prio=1 status=active
283+
# `- 0:0:0:0 sda 8:0 active ready running
284+
try:
285+
lines = out.splitlines()
286+
mpath_device = lines[0].split(' ')[1]
287+
# give back something like dm-0 so we can log it.
288+
return mpath_device
289+
except IndexError:
290+
# We didn't get any command output, so Nope.
291+
pass
292+
293+
184294
def get_component_devices(raid_device):
185295
"""Get the component devices of a Software RAID device.
186296
@@ -371,7 +481,8 @@ def _md_scan_and_assemble():
371481
def list_all_block_devices(block_type='disk',
372482
ignore_raid=False,
373483
ignore_floppy=True,
374-
ignore_empty=True):
484+
ignore_empty=True,
485+
ignore_multipath=False):
375486
"""List all physical block devices
376487
377488
The switches we use for lsblk: P for KEY="value" output, b for size output
@@ -388,6 +499,9 @@ def list_all_block_devices(block_type='disk',
388499
:param ignore_floppy: Ignore floppy disk devices in the block device
389500
list. By default, these devices are filtered out.
390501
:param ignore_empty: Whether to ignore disks with size equal 0.
502+
:param ignore_multipath: Whether to ignore devices backing multipath
503+
devices. Default is to consider multipath
504+
devices, if possible.
391505
:return: A list of BlockDevices
392506
"""
393507

@@ -398,6 +512,8 @@ def _is_known_device(existing, new_device_name):
398512
return True
399513
return False
400514

515+
check_multipath = not ignore_multipath and get_multipath_status()
516+
401517
_udev_settle()
402518

403519
# map device names to /dev/disk/by-path symbolic links that points to it
@@ -428,7 +544,6 @@ def _is_known_device(existing, new_device_name):
428544
'-o{}'.format(','.join(columns)))[0]
429545
lines = report.splitlines()
430546
context = pyudev.Context()
431-
432547
devices = []
433548
for line in lines:
434549
device = {}
@@ -450,10 +565,25 @@ def _is_known_device(existing, new_device_name):
450565
LOG.debug('Ignoring floppy disk device: %s', line)
451566
continue
452567

568+
dev_kname = device.get('KNAME')
569+
if check_multipath:
570+
# Net effect is we ignore base devices, and their base devices
571+
# to what would be the mapped device name which would not pass the
572+
# validation, but would otherwise be match-able.
573+
mpath_parent_dev = _get_multipath_parent_device(dev_kname)
574+
if mpath_parent_dev:
575+
LOG.warning(
576+
"We have identified a multipath device %(device)s, this "
577+
"is being ignored in favor of %(mpath_device)s and its "
578+
"related child devices.",
579+
{'device': dev_kname,
580+
'mpath_device': mpath_parent_dev})
581+
continue
453582
# Search for raid in the reply type, as RAID is a
454583
# disk device, and we should honor it if is present.
455584
# Other possible type values, which we skip recording:
456585
# lvm, part, rom, loop
586+
457587
if devtype != block_type:
458588
if devtype is None or ignore_raid:
459589
LOG.debug(
@@ -462,7 +592,7 @@ def _is_known_device(existing, new_device_name):
462592
{'block_type': block_type, 'line': line})
463593
continue
464594
elif ('raid' in devtype
465-
and block_type in ['raid', 'disk']):
595+
and block_type in ['raid', 'disk', 'mpath']):
466596
LOG.debug(
467597
"TYPE detected to contain 'raid', signifying a "
468598
"RAID volume. Found: %s", line)
@@ -476,6 +606,11 @@ def _is_known_device(existing, new_device_name):
476606
LOG.debug(
477607
"TYPE detected to contain 'md', signifying a "
478608
"RAID partition. Found: %s", line)
609+
elif devtype == 'mpath' and block_type == 'disk':
610+
LOG.debug(
611+
"TYPE detected to contain 'mpath', "
612+
"signifing a device mapper multipath device. "
613+
"Found: %s", line)
479614
else:
480615
LOG.debug(
481616
"TYPE did not match. Wanted: %(block_type)s but found: "
@@ -1001,6 +1136,10 @@ def evaluate_hardware_support(self):
10011136
_check_for_iscsi()
10021137
_md_scan_and_assemble()
10031138
_load_ipmi_modules()
1139+
global MULTIPATH_ENABLED
1140+
if MULTIPATH_ENABLED is None:
1141+
MULTIPATH_ENABLED = _enable_multipath()
1142+
10041143
self.wait_for_disks()
10051144
return HardwareSupport.GENERIC
10061145

@@ -2732,3 +2871,11 @@ def deduplicate_steps(candidate_steps):
27322871
deduped_steps[manager].append(winning_step)
27332872

27342873
return deduped_steps
2874+
2875+
2876+
def get_multipath_status():
2877+
"""Return the status of multipath initialization."""
2878+
# NOTE(TheJulia): Provides a nice place to mock out and simplify testing
2879+
# as if we directly try and work with the global var, we will be racing
2880+
# tests endlessly.
2881+
return MULTIPATH_ENABLED

ironic_python_agent/tests/unit/extensions/test_image.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -733,6 +733,7 @@ def test__install_grub2_uefi(self, mock_get_part_uuid, mkdir_mock,
733733
mock_append_to_fstab.assert_called_with(self.fake_dir,
734734
self.fake_efi_system_part_uuid)
735735

736+
@mock.patch.object(hardware, 'get_multipath_status', lambda *_: False)
736737
@mock.patch.object(os.path, 'ismount', lambda *_: False)
737738
@mock.patch.object(image, '_is_bootloader_loaded', lambda *_: True)
738739
@mock.patch.object(os.path, 'exists', autospec=True)
@@ -844,6 +845,7 @@ def test__install_grub2_uefi_fstab(self, mock_get_part_uuid, mkdir_mock,
844845
uuid=self.fake_efi_system_part_uuid)
845846
self.assertFalse(mock_dispatch.called)
846847

848+
@mock.patch.object(hardware, 'get_multipath_status', lambda *_: False)
847849
@mock.patch.object(image, '_efi_boot_setup', lambda *_: False)
848850
@mock.patch.object(os.path, 'ismount', lambda *_: False)
849851
@mock.patch.object(image, '_is_bootloader_loaded', lambda *_: True)

ironic_python_agent/tests/unit/samples/hardware_samples.py

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,10 @@
121121
'KNAME="fd1" MODEL="magic" SIZE="4096" ROTA="1" TYPE="disk" UUID="" '
122122
'PARTUUID=""\n'
123123
'KNAME="sdf" MODEL="virtual floppy" SIZE="0" ROTA="1" TYPE="disk" UUID="" '
124-
'PARTUUID=""'
124+
'PARTUUID=""\n'
125+
'KNAME="dm-0" MODEL="NWD-BLP4-1600 " SIZE="1765517033472" '
126+
' ROTA="0" TYPE="mpath" UUID="" PARTUUID=""\n'
127+
125128
)
126129

127130
# NOTE(pas-ha) largest device is 1 byte smaller than 4GiB
@@ -160,6 +163,49 @@
160163
'PARTUUID=""'
161164
)
162165

166+
MULTIPATH_BLK_DEVICE_TEMPLATE = (
167+
'KNAME="sda" MODEL="INTEL_SSDSC2CT060A3" SIZE="60022480896" ROTA="0" '
168+
'TYPE="disk" UUID="" PARTUUID=""\n'
169+
'KNAME="sda2" MODEL="" SIZE="59162722304" ROTA="0" TYPE="part" '
170+
'UUID="f8b55d59-96c3-3982-b129-1b6b2ee8da86" '
171+
'PARTUUID="c97c8aac-7796-4433-b1fc-9b5fac43edf3"\n'
172+
'KNAME="sda3" MODEL="" SIZE="650002432" ROTA="0" TYPE="part" '
173+
'UUID="b3b03565-5f13-3c93-b2a6-6d90e25be926" '
174+
'PARTUUID="6c85beff-b2bd-4a1c-91b7-8abb5256459d"\n'
175+
'KNAME="sda1" MODEL="" SIZE="209715200" ROTA="0" TYPE="part" '
176+
'UUID="0a83355d-7500-3f5f-9abd-66f6fd03714c" '
177+
'PARTUUID="eba28b26-b76a-402c-94dd-0b66a523a485"\n'
178+
'KNAME="dm-0" MODEL="" SIZE="60022480896" ROTA="0" TYPE="mpath" '
179+
'UUID="" PARTUUID=""\n'
180+
'KNAME="dm-4" MODEL="" SIZE="650002432" ROTA="0" TYPE="part" '
181+
'UUID="b3b03565-5f13-3c93-b2a6-6d90e25be926" '
182+
'PARTUUID="6c85beff-b2bd-4a1c-91b7-8abb5256459d"\n'
183+
'KNAME="dm-2" MODEL="" SIZE="209715200" ROTA="0" TYPE="part" '
184+
'UUID="0a83355d-7500-3f5f-9abd-66f6fd03714c" '
185+
'PARTUUID="eba28b26-b76a-402c-94dd-0b66a523a485"\n'
186+
'KNAME="dm-3" MODEL="" SIZE="59162722304" ROTA="0" TYPE="part" '
187+
'UUID="f8b55d59-96c3-3982-b129-1b6b2ee8da86" '
188+
'PARTUUID="c97c8aac-7796-4433-b1fc-9b5fac43edf3"\n'
189+
'KNAME="sdb" MODEL="INTEL_SSDSC2CT060A3" SIZE="60022480896" '
190+
'ROTA="0" TYPE="disk" UUID="" PARTUUID=""\n'
191+
'KNAME="sdb2" MODEL="" SIZE="59162722304" ROTA="0" TYPE="part" '
192+
'UUID="f8b55d59-96c3-3982-b129-1b6b2ee8da86" '
193+
'PARTUUID="c97c8aac-7796-4433-b1fc-9b5fac43edf3"\n'
194+
'KNAME="sdb3" MODEL="" SIZE="650002432" ROTA="0" TYPE="part" '
195+
'UUID="b3b03565-5f13-3c93-b2a6-6d90e25be926" '
196+
'PARTUUID="6c85beff-b2bd-4a1c-91b7-8abb5256459d"\n'
197+
'KNAME="sdb1" MODEL="" SIZE="209715200" ROTA="0" TYPE="part" '
198+
'UUID="0a83355d-7500-3f5f-9abd-66f6fd03714c" '
199+
'PARTUUID="eba28b26-b76a-402c-94dd-0b66a523a485"\n'
200+
'KNAME="sdc" MODEL="ST1000DM003-1CH162" SIZE="1000204886016" '
201+
'ROTA="1" TYPE="disk" UUID="" PARTUUID=""\n'
202+
'KNAME="sdc1" MODEL="" SIZE="899999072256" ROTA="1" TYPE="part" '
203+
'UUID="457f7d3c-9376-4997-89bd-d1a7c8b04060" '
204+
'PARTUUID="c9433d2e-3bbc-47b4-92bf-43c1d80f06e0"\n'
205+
'KNAME="dm-1" MODEL="" SIZE="1000204886016" ROTA="0" TYPE="mpath" '
206+
'UUID="" PARTUUID=""\n'
207+
)
208+
163209
PARTUUID_DEVICE_TEMPLATE = (
164210
'KNAME="sda" MODEL="DRIVE 0" SIZE="1765517033472" '
165211
'ROTA="1" TYPE="disk" UUID="" PARTUUID=""\n'
@@ -1501,7 +1547,6 @@
15011547
}
15021548
""")
15031549

1504-
15051550
SGDISK_INFO_TEMPLATE = ("""
15061551
Partition GUID code: C12A7328-F81F-11D2-BA4B-00A0C93EC93B (EFI system partition)
15071552
Partition unique GUID: FAED7408-6D92-4FC6-883B-9069E2274ECA
@@ -1511,3 +1556,13 @@
15111556
Attribute flags: 0000000000000000
15121557
Partition name: 'EFI System Partition'
15131558
""") # noqa
1559+
1560+
MULTIPATH_VALID_PATH = '%s is a valid multipath device path'
1561+
MULTIPATH_INVALID_PATH = '%s is not a valid multipath device path'
1562+
1563+
MULTIPATH_LINKS_DM = (
1564+
'SUPER_FRIENDLY_NAME %s ATA,INTEL SSDSC2CT06\n'
1565+
'size=56G features=\'1 retain_attached_hw_handler\' hwhandler=\'0\' wp=rw\n' # noqa
1566+
' `-+- policy=\'service-time 0\' prio=1 status=active\n'
1567+
' `- 0:0:0:0 device s 8:0 active ready running\n'
1568+
)

0 commit comments

Comments
 (0)