Skip to content

Commit 668cd21

Browse files
juliakregeriurygregory
authored andcommitted
Multipath Hardware path handling
Removes multipath base devices from consideration by default, and instead allows the device-mapper device managed by multipath to be picked up and utilized instead. In effect, allowing us to ignore standby paths *and* leverage multiple concurrent IO paths if so offered via ALUA. In reality, anyone who has previously built IPA with multipath tooling might not have encountered issues previously because they used Active/Active SAN storage environments. They would have worked because the IO lock would have been exchanged between controllers and paths. However, Active/Passive environments will block passive paths from access, ultimately preventing new locks from being established without proper negotiation. Ultimately requiring multipathing *and* the agent to be smart enough to know to disqualify underlying paths to backend storage volumes. An additional benefit of this is active/active MPIO devices will, as long as ``multipath`` is present inside the ramdisk, no longer possibly result in duplicate IO wipes occuring accross numerous devices. Story: #2010003 Task: #45108 Resolves: rhbz#2076622 Resolves: rhbz#2070519 Change-Id: I0fd6356f036d5ff17510fb838eaf418164cdfc92
1 parent 12e0369 commit 668cd21

File tree

8 files changed

+740
-56
lines changed

8 files changed

+740
-56
lines changed

ironic_python_agent/hardware.py

Lines changed: 149 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@
8080
}
8181
}
8282

83+
MULTIPATH_ENABLED = None
84+
8385

8486
def _get_device_info(dev, devclass, field):
8587
"""Get the device info according to device class and field."""
@@ -126,6 +128,35 @@ def _udev_settle():
126128
return
127129

128130

131+
def _load_multipath_modules():
132+
"""Load multipath modules
133+
134+
This is required to be able to collect multipath information.
135+
Two separate paths exist, one with a helper utility for Centos/RHEL
136+
and another which is just load the modules, and trust multipathd
137+
will do the needful.
138+
"""
139+
if (os.path.isfile('/usr/sbin/mpathconf')
140+
and not os.path.isfile('/etc/multipath.conf')):
141+
# For Centos/Rhel/Etc which uses mpathconf, this does
142+
# a couple different things, including configuration generation...
143+
# which is not *really* required.. at least *shouldn't* be.
144+
# WARNING(TheJulia): This command explicitly replaces local
145+
# configuration.
146+
il_utils.try_execute('/usr/sbin/mpathconf', '--enable',
147+
'--find_multipaths', 'yes',
148+
'--with_module', 'y',
149+
'--with_multipathd', 'y')
150+
else:
151+
# Ensure modules are loaded. Configuration is not required
152+
# and implied based upon compiled in defaults.
153+
# NOTE(TheJulia): Debian/Ubuntu specifically just document
154+
# using `multipath -t` output to start a new configuration
155+
# file, if needed.
156+
il_utils.try_execute('modprobe', 'dm_multipath')
157+
il_utils.try_execute('modprobe', 'multipath')
158+
159+
129160
def _check_for_iscsi():
130161
"""Connect iSCSI shared connected via iBFT or OF.
131162
@@ -169,6 +200,84 @@ def _get_md_uuid(raid_device):
169200
return match.group(1)
170201

171202

203+
def _enable_multipath():
204+
"""Initialize multipath IO if possible.
205+
206+
:returns: True if the multipathd daemon and multipath command to enumerate
207+
devices was scucessfully able to be called.
208+
"""
209+
try:
210+
_load_multipath_modules()
211+
# This might not work, ideally it *should* already be running...
212+
# NOTE(TheJulia): Testing locally, a prior running multipathd, the
213+
# explicit multipathd start just appears to silently exit with a
214+
# result code of 0.
215+
utils.execute('multipathd')
216+
# This is mainly to get the system to actually do the needful and
217+
# identify/enumerate paths by combining what it can detect and what
218+
# it already knows. This may be useful, and in theory this should be
219+
# logged in the IPA log should it be needed.
220+
utils.execute('multipath', '-ll')
221+
return True
222+
except FileNotFoundError as e:
223+
LOG.warning('Attempted to determine if multipath tools were present. '
224+
'Not detected. Error recorded: %s', e)
225+
return False
226+
except processutils.ProcessExecutionError as e:
227+
LOG.warning('Attempted to invoke multipath utilities, but we '
228+
'encountered an error: %s', e)
229+
return False
230+
231+
232+
def _get_multipath_parent_device(device):
233+
"""Check and return a multipath device."""
234+
if not device:
235+
# if lsblk provides invalid output, this can be None.
236+
return
237+
check_device = os.path.join('/dev', str(device))
238+
try:
239+
# Explicitly run the check as regardless of if the device is mpath or
240+
# not, multipath tools when using list always exits with a return
241+
# code of 0.
242+
utils.execute('multipath', '-c', check_device)
243+
# path check with return an exit code of 1 if you send it a multipath
244+
# device mapper device, like dm-0.
245+
# NOTE(TheJulia): -ll is supposed to load from all available
246+
# information, but may not force a rescan. It may be -f if we need
247+
# that. That being said, it has been about a decade since I was
248+
# running multipath tools on SAN connected gear, so my memory is
249+
# definitely fuzzy.
250+
out, _ = utils.execute('multipath', '-ll', check_device)
251+
except processutils.ProcessExecutionError as e:
252+
# FileNotFoundError if the utility does not exist.
253+
# -1 return code if the device is not valid.
254+
LOG.debug('Checked device %(dev)s and determined it was '
255+
'not a multipath device. %(error)s',
256+
{'dev': check_device,
257+
'error': e})
258+
return
259+
except FileNotFoundError:
260+
# This should never happen, as MULTIPATH_ENABLED would be False
261+
# before this occurs.
262+
LOG.warning('Attempted to check multipathing status, however '
263+
'the \'multipath\' binary is missing or not in the '
264+
'execution PATH.')
265+
return
266+
# Data format:
267+
# MPATHDEVICENAME dm-0 TYPE,HUMANNAME
268+
# size=56G features='1 retain_attached_hw_handler' hwhandler='0' wp=rw
269+
# `-+- policy='service-time 0' prio=1 status=active
270+
# `- 0:0:0:0 sda 8:0 active ready running
271+
try:
272+
lines = out.splitlines()
273+
mpath_device = lines[0].split(' ')[1]
274+
# give back something like dm-0 so we can log it.
275+
return mpath_device
276+
except IndexError:
277+
# We didn't get any command output, so Nope.
278+
pass
279+
280+
172281
def _get_component_devices(raid_device):
173282
"""Get the component devices of a Software RAID device.
174283
@@ -359,7 +468,8 @@ def _md_scan_and_assemble():
359468
def list_all_block_devices(block_type='disk',
360469
ignore_raid=False,
361470
ignore_floppy=True,
362-
ignore_empty=True):
471+
ignore_empty=True,
472+
ignore_multipath=False):
363473
"""List all physical block devices
364474
365475
The switches we use for lsblk: P for KEY="value" output, b for size output
@@ -376,6 +486,9 @@ def list_all_block_devices(block_type='disk',
376486
:param ignore_floppy: Ignore floppy disk devices in the block device
377487
list. By default, these devices are filtered out.
378488
:param ignore_empty: Whether to ignore disks with size equal 0.
489+
:param ignore_multipath: Whether to ignore devices backing multipath
490+
devices. Default is to consider multipath
491+
devices, if possible.
379492
:return: A list of BlockDevices
380493
"""
381494

@@ -386,6 +499,8 @@ def _is_known_device(existing, new_device_name):
386499
return True
387500
return False
388501

502+
check_multipath = not ignore_multipath and get_multipath_status()
503+
389504
_udev_settle()
390505

391506
# map device names to /dev/disk/by-path symbolic links that points to it
@@ -416,7 +531,6 @@ def _is_known_device(existing, new_device_name):
416531
'-o{}'.format(','.join(columns)))[0]
417532
lines = report.splitlines()
418533
context = pyudev.Context()
419-
420534
devices = []
421535
for line in lines:
422536
device = {}
@@ -438,16 +552,31 @@ def _is_known_device(existing, new_device_name):
438552
LOG.debug('Ignoring floppy disk device %s', device)
439553
continue
440554

555+
dev_kname = device.get('KNAME')
556+
if check_multipath:
557+
# Net effect is we ignore base devices, and their base devices
558+
# to what would be the mapped device name which would not pass the
559+
# validation, but would otherwise be match-able.
560+
mpath_parent_dev = _get_multipath_parent_device(dev_kname)
561+
if mpath_parent_dev:
562+
LOG.warning(
563+
"We have identified a multipath device %(device)s, this "
564+
"is being ignored in favor of %(mpath_device)s and its "
565+
"related child devices.",
566+
{'device': dev_kname,
567+
'mpath_device': mpath_parent_dev})
568+
continue
441569
# Search for raid in the reply type, as RAID is a
442570
# disk device, and we should honor it if is present.
443571
# Other possible type values, which we skip recording:
444572
# lvm, part, rom, loop
573+
445574
if devtype != block_type:
446575
if devtype is None or ignore_raid:
447576
LOG.debug("Skipping: {!r}".format(line))
448577
continue
449578
elif ('raid' in devtype
450-
and block_type in ['raid', 'disk']):
579+
and block_type in ['raid', 'disk', 'mpath']):
451580
LOG.debug(
452581
"TYPE detected to contain 'raid', signifying a "
453582
"RAID volume. Found: {!r}".format(line))
@@ -461,6 +590,11 @@ def _is_known_device(existing, new_device_name):
461590
LOG.debug(
462591
"TYPE detected to contain 'md', signifying a "
463592
"RAID partition. Found: {!r}".format(line))
593+
elif devtype == 'mpath' and block_type == 'disk':
594+
LOG.debug(
595+
"TYPE detected to contain 'mpath', "
596+
"signifing a device mapper multipath device. "
597+
"Found: %s", line)
464598
else:
465599
LOG.debug(
466600
"TYPE did not match. Wanted: {!r} but found: {!r}".format(
@@ -974,6 +1108,10 @@ def evaluate_hardware_support(self):
9741108
# Do some initialization before we declare ourself ready
9751109
_check_for_iscsi()
9761110
_md_scan_and_assemble()
1111+
global MULTIPATH_ENABLED
1112+
if MULTIPATH_ENABLED is None:
1113+
MULTIPATH_ENABLED = _enable_multipath()
1114+
9771115
self.wait_for_disks()
9781116
return HardwareSupport.GENERIC
9791117

@@ -2609,3 +2747,11 @@ def deduplicate_steps(candidate_steps):
26092747
deduped_steps[manager].append(winning_step)
26102748

26112749
return deduped_steps
2750+
2751+
2752+
def get_multipath_status():
2753+
"""Return the status of multipath initialization."""
2754+
# NOTE(TheJulia): Provides a nice place to mock out and simplify testing
2755+
# as if we directly try and work with the global var, we will be racing
2756+
# tests endlessly.
2757+
return MULTIPATH_ENABLED

ironic_python_agent/tests/unit/extensions/test_image.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -787,6 +787,7 @@ def test__install_grub2_uefi(self, mock_get_part_uuid, mkdir_mock,
787787
mock_append_to_fstab.assert_called_with(self.fake_dir,
788788
self.fake_efi_system_part_uuid)
789789

790+
@mock.patch.object(hardware, 'get_multipath_status', lambda *_: False)
790791
@mock.patch.object(os.path, 'ismount', lambda *_: False)
791792
@mock.patch.object(image, '_is_bootloader_loaded', lambda *_: True)
792793
@mock.patch.object(os.path, 'exists', autospec=True)
@@ -898,6 +899,7 @@ def test__install_grub2_uefi_fstab(self, mock_get_part_uuid, mkdir_mock,
898899
uuid=self.fake_efi_system_part_uuid)
899900
self.assertFalse(mock_dispatch.called)
900901

902+
@mock.patch.object(hardware, 'get_multipath_status', lambda *_: False)
901903
@mock.patch.object(image, '_efi_boot_setup', lambda *_: False)
902904
@mock.patch.object(os.path, 'ismount', lambda *_: False)
903905
@mock.patch.object(image, '_is_bootloader_loaded', lambda *_: True)

ironic_python_agent/tests/unit/samples/hardware_samples.py

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,10 @@
121121
'KNAME="fd1" MODEL="magic" SIZE="4096" ROTA="1" TYPE="disk" UUID="" '
122122
'PARTUUID=""\n'
123123
'KNAME="sdf" MODEL="virtual floppy" SIZE="0" ROTA="1" TYPE="disk" UUID="" '
124-
'PARTUUID=""'
124+
'PARTUUID=""\n'
125+
'KNAME="dm-0" MODEL="NWD-BLP4-1600 " SIZE="1765517033472" '
126+
' ROTA="0" TYPE="mpath" UUID="" PARTUUID=""\n'
127+
125128
)
126129

127130
# NOTE(pas-ha) largest device is 1 byte smaller than 4GiB
@@ -160,6 +163,49 @@
160163
'PARTUUID=""'
161164
)
162165

166+
MULTIPATH_BLK_DEVICE_TEMPLATE = (
167+
'KNAME="sda" MODEL="INTEL_SSDSC2CT060A3" SIZE="60022480896" ROTA="0" '
168+
'TYPE="disk" UUID="" PARTUUID=""\n'
169+
'KNAME="sda2" MODEL="" SIZE="59162722304" ROTA="0" TYPE="part" '
170+
'UUID="f8b55d59-96c3-3982-b129-1b6b2ee8da86" '
171+
'PARTUUID="c97c8aac-7796-4433-b1fc-9b5fac43edf3"\n'
172+
'KNAME="sda3" MODEL="" SIZE="650002432" ROTA="0" TYPE="part" '
173+
'UUID="b3b03565-5f13-3c93-b2a6-6d90e25be926" '
174+
'PARTUUID="6c85beff-b2bd-4a1c-91b7-8abb5256459d"\n'
175+
'KNAME="sda1" MODEL="" SIZE="209715200" ROTA="0" TYPE="part" '
176+
'UUID="0a83355d-7500-3f5f-9abd-66f6fd03714c" '
177+
'PARTUUID="eba28b26-b76a-402c-94dd-0b66a523a485"\n'
178+
'KNAME="dm-0" MODEL="" SIZE="60022480896" ROTA="0" TYPE="mpath" '
179+
'UUID="" PARTUUID=""\n'
180+
'KNAME="dm-4" MODEL="" SIZE="650002432" ROTA="0" TYPE="part" '
181+
'UUID="b3b03565-5f13-3c93-b2a6-6d90e25be926" '
182+
'PARTUUID="6c85beff-b2bd-4a1c-91b7-8abb5256459d"\n'
183+
'KNAME="dm-2" MODEL="" SIZE="209715200" ROTA="0" TYPE="part" '
184+
'UUID="0a83355d-7500-3f5f-9abd-66f6fd03714c" '
185+
'PARTUUID="eba28b26-b76a-402c-94dd-0b66a523a485"\n'
186+
'KNAME="dm-3" MODEL="" SIZE="59162722304" ROTA="0" TYPE="part" '
187+
'UUID="f8b55d59-96c3-3982-b129-1b6b2ee8da86" '
188+
'PARTUUID="c97c8aac-7796-4433-b1fc-9b5fac43edf3"\n'
189+
'KNAME="sdb" MODEL="INTEL_SSDSC2CT060A3" SIZE="60022480896" '
190+
'ROTA="0" TYPE="disk" UUID="" PARTUUID=""\n'
191+
'KNAME="sdb2" MODEL="" SIZE="59162722304" ROTA="0" TYPE="part" '
192+
'UUID="f8b55d59-96c3-3982-b129-1b6b2ee8da86" '
193+
'PARTUUID="c97c8aac-7796-4433-b1fc-9b5fac43edf3"\n'
194+
'KNAME="sdb3" MODEL="" SIZE="650002432" ROTA="0" TYPE="part" '
195+
'UUID="b3b03565-5f13-3c93-b2a6-6d90e25be926" '
196+
'PARTUUID="6c85beff-b2bd-4a1c-91b7-8abb5256459d"\n'
197+
'KNAME="sdb1" MODEL="" SIZE="209715200" ROTA="0" TYPE="part" '
198+
'UUID="0a83355d-7500-3f5f-9abd-66f6fd03714c" '
199+
'PARTUUID="eba28b26-b76a-402c-94dd-0b66a523a485"\n'
200+
'KNAME="sdc" MODEL="ST1000DM003-1CH162" SIZE="1000204886016" '
201+
'ROTA="1" TYPE="disk" UUID="" PARTUUID=""\n'
202+
'KNAME="sdc1" MODEL="" SIZE="899999072256" ROTA="1" TYPE="part" '
203+
'UUID="457f7d3c-9376-4997-89bd-d1a7c8b04060" '
204+
'PARTUUID="c9433d2e-3bbc-47b4-92bf-43c1d80f06e0"\n'
205+
'KNAME="dm-1" MODEL="" SIZE="1000204886016" ROTA="0" TYPE="mpath" '
206+
'UUID="" PARTUUID=""\n'
207+
)
208+
163209
PARTUUID_DEVICE_TEMPLATE = (
164210
'KNAME="sda" MODEL="DRIVE 0" SIZE="1765517033472" '
165211
'ROTA="1" TYPE="disk" UUID="" PARTUUID=""\n'
@@ -1501,7 +1547,6 @@
15011547
}
15021548
""")
15031549

1504-
15051550
SGDISK_INFO_TEMPLATE = ("""
15061551
Partition GUID code: C12A7328-F81F-11D2-BA4B-00A0C93EC93B (EFI system partition)
15071552
Partition unique GUID: FAED7408-6D92-4FC6-883B-9069E2274ECA
@@ -1511,3 +1556,13 @@
15111556
Attribute flags: 0000000000000000
15121557
Partition name: 'EFI System Partition'
15131558
""") # noqa
1559+
1560+
MULTIPATH_VALID_PATH = '%s is a valid multipath device path'
1561+
MULTIPATH_INVALID_PATH = '%s is not a valid multipath device path'
1562+
1563+
MULTIPATH_LINKS_DM = (
1564+
'SUPER_FRIENDLY_NAME %s ATA,INTEL SSDSC2CT06\n'
1565+
'size=56G features=\'1 retain_attached_hw_handler\' hwhandler=\'0\' wp=rw\n' # noqa
1566+
' `-+- policy=\'service-time 0\' prio=1 status=active\n'
1567+
' `- 0:0:0:0 device s 8:0 active ready running\n'
1568+
)

0 commit comments

Comments
 (0)