Skip to content

Commit 693af13

Browse files
committed
Unmount config drives
If this seems like deja vu, that is because it is. We had this very same issue with the original CoreOS ramdisk. Since we don't control the whole OS of the ramdisk, it only made sense to teach the agent to umount the folder. The folder is referenced already, and the agent does have safeguards in place, but unfortunately this issue led to a rebuild breaking where cloud-init, glean, and the agent were all trying do the right thing as they thought, and there were just multiple /mnt/config folders present in the OS. These are separate issues we also need to try and remedy. What happens is when the device is locked via a mount, the partition table is never updated to the running OS as the mount creates a lock. So the agent ends up thinking, in the case of a rebuild, that everything including creating a configuration drive on that device has been successful, but when you reboot, there is no partition table entry for the new partition as the change was not successfully written. This state prevented the workload from rebooting properly. This change eliminates that possibility moving forward by attempting to ensure that the cloud configuration folder is no longer mounted. Depends-On: https://review.opendev.org/c/openstack/ironic/+/918118 Change-Id: I4399dd0934361003cca9ff95a7e3e3ae9bba3dab (cherry picked from commit 6ac3f35) (cherry picked from commit 0337474)
1 parent 3c61f2b commit 693af13

File tree

3 files changed

+73
-6
lines changed

3 files changed

+73
-6
lines changed

ironic_python_agent/tests/unit/test_utils.py

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import subprocess
2323
import tarfile
2424
import tempfile
25+
import time
2526
from unittest import mock
2627

2728
from ironic_lib import utils as ironic_utils
@@ -877,6 +878,7 @@ def test_sync_clock_ntp_server_is_none(self, mock_time_method,
877878
self.assertEqual(0, mock_execute.call_count)
878879

879880

881+
@mock.patch.object(utils, '_unmount_any_config_drives', autospec=True)
880882
@mock.patch.object(utils, '_booted_from_vmedia', autospec=True)
881883
@mock.patch.object(utils, '_check_vmedia_device', autospec=True)
882884
@mock.patch.object(utils, '_find_vmedia_device_by_labels', autospec=True)
@@ -887,7 +889,8 @@ class TestCopyConfigFromVmedia(testtools.TestCase):
887889

888890
def test_vmedia_found_not_booted_from_vmedia(
889891
self, mock_execute, mock_mount, mock_copy,
890-
mock_find_device, mock_check_vmedia, mock_booted_from_vmedia):
892+
mock_find_device, mock_check_vmedia, mock_booted_from_vmedia,
893+
mock_unmount_config):
891894
mock_booted_from_vmedia.return_value = False
892895
mock_find_device.return_value = '/dev/fake'
893896
utils.copy_config_from_vmedia()
@@ -896,10 +899,12 @@ def test_vmedia_found_not_booted_from_vmedia(
896899
mock_copy.assert_not_called()
897900
mock_check_vmedia.assert_not_called()
898901
self.assertTrue(mock_booted_from_vmedia.called)
902+
self.assertTrue(mock_unmount_config.called)
899903

900904
def test_no_vmedia(
901905
self, mock_execute, mock_mount, mock_copy,
902-
mock_find_device, mock_check_vmedia, mock_booted_from_vmedia):
906+
mock_find_device, mock_check_vmedia, mock_booted_from_vmedia,
907+
mock_unmount_config):
903908
mock_booted_from_vmedia.return_value = True
904909
mock_find_device.return_value = None
905910
utils.copy_config_from_vmedia()
@@ -908,10 +913,12 @@ def test_no_vmedia(
908913
mock_copy.assert_not_called()
909914
mock_check_vmedia.assert_not_called()
910915
self.assertFalse(mock_booted_from_vmedia.called)
916+
self.assertTrue(mock_unmount_config.called)
911917

912918
def test_no_files(
913919
self, mock_execute, mock_mount, mock_copy,
914-
mock_find_device, mock_check_vmedia, mock_booted_from_vmedia):
920+
mock_find_device, mock_check_vmedia, mock_booted_from_vmedia,
921+
mock_unmount_config):
915922
mock_booted_from_vmedia.return_value = True
916923
temp_path = tempfile.mkdtemp()
917924
self.addCleanup(lambda: shutil.rmtree(temp_path))
@@ -925,10 +932,12 @@ def test_no_files(
925932
'/dev/something')
926933
mock_copy.assert_not_called()
927934
self.assertTrue(mock_booted_from_vmedia.called)
935+
self.assertTrue(mock_unmount_config.called)
928936

929937
def test_mounted_no_files(
930938
self, mock_execute, mock_mount, mock_copy,
931-
mock_find_device, mock_check_vmedia, mock_booted_from_vmedia):
939+
mock_find_device, mock_check_vmedia, mock_booted_from_vmedia,
940+
mock_unmount_config):
932941

933942
mock_booted_from_vmedia.return_value = True
934943
mock_execute.return_value = '/some/path', ''
@@ -939,11 +948,13 @@ def test_mounted_no_files(
939948
mock_copy.assert_not_called()
940949
mock_mount.assert_not_called()
941950
self.assertTrue(mock_booted_from_vmedia.called)
951+
self.assertTrue(mock_unmount_config.called)
942952

943953
@mock.patch.object(os, 'makedirs', autospec=True)
944954
def test_copy(
945955
self, mock_makedirs, mock_execute, mock_mount, mock_copy,
946-
mock_find_device, mock_check_vmedia, mock_booted_from_vmedia):
956+
mock_find_device, mock_check_vmedia, mock_booted_from_vmedia,
957+
mock_unmount_config):
947958

948959
mock_booted_from_vmedia.return_value = True
949960
mock_find_device.return_value = '/dev/something'
@@ -982,12 +993,14 @@ def _fake_mount(dev):
982993
mock.call(mock.ANY, '/etc/ironic-python-agent.d/ironic.conf'),
983994
], any_order=True)
984995
self.assertTrue(mock_booted_from_vmedia.called)
996+
self.assertTrue(mock_unmount_config.called)
985997

986998
@mock.patch.object(os, 'makedirs', autospec=True)
987999
def test_copy_mounted(
9881000
self, mock_makedirs, mock_execute, mock_mount,
9891001
mock_copy, mock_find_device, mock_check_vmedia,
990-
mock_booted_from_vmedia):
1002+
mock_booted_from_vmedia,
1003+
mock_unmount_config):
9911004
mock_booted_from_vmedia.return_value = True
9921005
mock_find_device.return_value = '/dev/something'
9931006
path = tempfile.mkdtemp()
@@ -1023,6 +1036,7 @@ def test_copy_mounted(
10231036
], any_order=True)
10241037
mock_mount.assert_not_called()
10251038
self.assertTrue(mock_booted_from_vmedia.called)
1039+
self.assertTrue(mock_unmount_config.called)
10261040

10271041

10281042
@mock.patch.object(requests, 'get', autospec=True)
@@ -1143,3 +1157,20 @@ def test_early_logging_goes_to_logger(self, mock_log):
11431157
expected_calls = [mock.call('Early logging: %s', 'line 1.'),
11441158
mock.call('Early logging: %s', 'line 2 message')]
11451159
info.assert_has_calls(expected_calls)
1160+
1161+
1162+
class TestUnmountOfConfig(ironic_agent_base.IronicAgentTest):
1163+
1164+
@mock.patch.object(utils, '_early_log', autospec=True)
1165+
@mock.patch.object(os.path, 'ismount', autospec=True)
1166+
@mock.patch.object(utils, 'execute', autospec=True)
1167+
@mock.patch.object(time, 'sleep', autospec=True)
1168+
def test__unmount_any_config_drives(self, mock_sleep, mock_exec,
1169+
mock_ismount, mock_log,):
1170+
mock_ismount.side_effect = iter([True, True, False])
1171+
utils._unmount_any_config_drives()
1172+
self.assertEqual(2, mock_sleep.call_count)
1173+
self.assertEqual(2, mock_log.call_count)
1174+
mock_exec.assert_has_calls([
1175+
mock.call('umount', '/mnt/config'),
1176+
mock.call('umount', '/mnt/config')])

ironic_python_agent/utils.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,18 +311,27 @@ def copy_config_from_vmedia():
311311
['config-2', 'vmedia_boot_iso'])
312312
if not vmedia_device_file:
313313
_early_log('No virtual media device detected')
314+
_unmount_any_config_drives()
314315
return
315316
if not _booted_from_vmedia():
316317
_early_log('Cannot use configuration from virtual media as the '
317318
'agent was not booted from virtual media.')
319+
_unmount_any_config_drives()
318320
return
319321
# Determine the device
320322
mounted = _find_mount_point(vmedia_device_file)
321323
if mounted:
324+
# In this case a utility like a configuration drive tool
325+
# has *already* mounted the device we believe to be the
326+
# configuration drive.
322327
_copy_config_from(mounted)
323328
else:
324329
with ironic_utils.mounted(vmedia_device_file) as vmedia_mount_point:
330+
# In this case, we use a temporary folder and extract the contents
331+
# for our configuration.
325332
_copy_config_from(vmedia_mount_point)
333+
# As a last act, just make sure there is nothing else mounted.
334+
_unmount_any_config_drives()
326335

327336

328337
def _get_cached_params():
@@ -917,3 +926,22 @@ def rescan_device(device):
917926
except processutils.ProcessExecutionError as e:
918927
LOG.warning('Something went wrong when waiting for udev '
919928
'to settle. Error: %s', e)
929+
930+
931+
def _unmount_any_config_drives():
932+
"""Umount anything mounted to /mnt/config
933+
934+
As part of the configuration drive model, utilities like cloud-init
935+
and glean leverage a folder at /mnt/config to convey configuration
936+
to a booting OS.
937+
938+
The possibility exists that one of the utilties mounted one or multiple
939+
such folders, even if the configuration was not used, and this can
940+
result in locked devices which can prevent rebuild operations from
941+
completing successfully as as long as the folder is mounted, it is
942+
a "locked" device to the operating system.
943+
"""
944+
while os.path.ismount('/mnt/config'):
945+
_early_log('Issuing an umount command for /mnt/config...')
946+
execute('umount', '/mnt/config')
947+
time.sleep(1)
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
---
2+
fixes:
3+
- |
4+
Fixes an issue where configuration drive volumes which are mounted
5+
by the operating system could remain mounted and cause a lock to be
6+
held, which may conflict with actions such as ``rebuild``.
7+
The agent now always makes sure the folder used by Glean and Cloud-init
8+
is not mounted.

0 commit comments

Comments
 (0)