diff options
author | Chad Smith <chad.smith@canonical.com> | 2021-02-19 15:37:57 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-02-19 15:37:57 -0700 |
commit | 66e2d42dd1b722dc8e59f4e5990cea54f81ccd2a (patch) | |
tree | da3d84ce220872f47c42210bb59a2fce58883cb8 | |
parent | 08d8902a95407d1f313ba1c679145d5f6b0df455 (diff) | |
download | vyos-cloud-init-66e2d42dd1b722dc8e59f4e5990cea54f81ccd2a.tar.gz vyos-cloud-init-66e2d42dd1b722dc8e59f4e5990cea54f81ccd2a.zip |
azure: case-insensitive UUID to avoid new IID during kernel upgrade (#798)
Kernel's newer than 4.15 present /sys/dmi/id/product_uuid as a
lowercase value. Previously UUID was uppercase.
Azure datasource reads the product_uuid directly as their platform's
instance-id. This presents a problem if a kernel is either
upgraded or downgraded across the 4.15 kernel version boundary because
the case of the UUID will change, resulting in cloud-init seeing a
"new" instance id and re-running all modules.
Re-running cc_ssh in cloud-init deletes and regenerates ssh_host keys
on a system which can cause concern on long-running instances that
somethingnefarious has happened.
Also add:
- An integration test for this for Azure Bionic Ubuntu FIPS upgrading from
a FIPS kernel with uppercase UUID to a lowercase UUID in linux-azure
- A new pytest.mark.sru_next to collect all integration tests related to our
next SRU
LP: #1835584
-rwxr-xr-x | cloudinit/sources/DataSourceAzure.py | 12 | ||||
-rw-r--r-- | tests/integration_tests/bugs/test_lp1835584.py | 104 | ||||
-rw-r--r-- | tests/integration_tests/instances.py | 6 | ||||
-rw-r--r-- | tests/unittests/test_datasource/test_azure.py | 39 | ||||
-rw-r--r-- | tox.ini | 1 |
5 files changed, 153 insertions, 9 deletions
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 090dd66b..748a9716 100755 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -683,10 +683,18 @@ class DataSourceAzure(sources.DataSource): def _iid(self, previous=None): prev_iid_path = os.path.join( self.paths.get_cpath('data'), 'instance-id') - iid = dmi.read_dmi_data('system-uuid') + # Older kernels than 4.15 will have UPPERCASE product_uuid. + # We don't want Azure to react to an UPPER/lower difference as a new + # instance id as it rewrites SSH host keys. + # LP: #1835584 + iid = dmi.read_dmi_data('system-uuid').lower() if os.path.exists(prev_iid_path): previous = util.load_file(prev_iid_path).strip() - if is_byte_swapped(previous, iid): + if previous.lower() == iid: + # If uppercase/lowercase equivalent, return the previous value + # to avoid new instance id. + return previous + if is_byte_swapped(previous.lower(), iid): return previous return iid diff --git a/tests/integration_tests/bugs/test_lp1835584.py b/tests/integration_tests/bugs/test_lp1835584.py new file mode 100644 index 00000000..660d2a2a --- /dev/null +++ b/tests/integration_tests/bugs/test_lp1835584.py @@ -0,0 +1,104 @@ +""" Integration test for LP #1835584 + +Upstream linux kernels prior to 4.15 providate DMI product_uuid in uppercase. +More recent kernels switched to lowercase for DMI product_uuid. Azure +datasource uses this product_uuid as the instance-id for cloud-init. + +The linux-azure-fips kernel installed in PRO FIPs images, that product UUID is +uppercase whereas the linux-azure cloud-optimized kernel reports the UUID as +lowercase. + +In cases where product_uuid changes case, ensure cloud-init doesn't +recreate ssh hostkeys across reboot (due to detecting an instance_id change). + +This currently only affects linux-azure-fips -> linux-azure on Bionic. +This test won't run on Xenial because both linux-azure-fips and linux-azure +report uppercase product_uuids. + +The test will launch a specific Bionic Ubuntu PRO FIPS image which has a +linux-azure-fips kernel known to report product_uuid as uppercase. Then upgrade +and reboot into linux-azure kernel which is known to report product_uuid as +lowercase. + +Across the reboot, assert that we didn't re-run config_ssh by virtue of +seeing only one semaphore creation log entry of type: + + Writing to /var/lib/cloud/instances/<UUID>/sem/config_ssh - + +https://bugs.launchpad.net/cloud-init/+bug/1835584 +""" +import re + +import pytest + +from tests.integration_tests.instances import IntegrationAzureInstance +from tests.integration_tests.clouds import ( + ImageSpecification, IntegrationCloud +) +from tests.integration_tests.conftest import get_validated_source + + +IMG_AZURE_UBUNTU_PRO_FIPS_BIONIC = ( + "Canonical:0001-com-ubuntu-pro-bionic-fips:pro-fips-18_04:18.04.202010201" +) + + +def _check_iid_insensitive_across_kernel_upgrade( + instance: IntegrationAzureInstance +): + uuid = instance.read_from_file("/sys/class/dmi/id/product_uuid") + assert uuid.isupper(), ( + "Expected uppercase UUID on Ubuntu FIPS image {}".format( + uuid + ) + ) + orig_kernel = instance.execute("uname -r").strip() + assert "azure-fips" in orig_kernel + result = instance.execute("apt-get update") + # Install a 5.4+ kernel which provides lowercase product_uuid + result = instance.execute("apt-get install linux-azure --assume-yes") + if not result.ok: + pytest.fail("Unable to install linux-azure kernel: {}".format(result)) + instance.restart() + new_kernel = instance.execute("uname -r").strip() + assert orig_kernel != new_kernel + assert "azure-fips" not in new_kernel + assert "azure" in new_kernel + new_uuid = instance.read_from_file("/sys/class/dmi/id/product_uuid") + assert ( + uuid.lower() == new_uuid + ), "Expected UUID on linux-azure to be lowercase of FIPS: {}".format(uuid) + log = instance.read_from_file("/var/log/cloud-init.log") + RE_CONFIG_SSH_SEMAPHORE = r"Writing.*sem/config_ssh " + ssh_runs = len(re.findall(RE_CONFIG_SSH_SEMAPHORE, log)) + assert 1 == ssh_runs, "config_ssh ran too many times {}".format(ssh_runs) + + +@pytest.mark.azure +@pytest.mark.sru_next +def test_azure_kernel_upgrade_case_insensitive_uuid( + session_cloud: IntegrationCloud +): + cfg_image_spec = ImageSpecification.from_os_image() + if (cfg_image_spec.os, cfg_image_spec.release) != ("ubuntu", "bionic"): + pytest.skip( + "Test only supports ubuntu:bionic not {0.os}:{0.release}".format( + cfg_image_spec + ) + ) + source = get_validated_source(session_cloud) + if not source.installs_new_version(): + pytest.skip( + "Provide CLOUD_INIT_SOURCE to install expected working cloud-init" + ) + image_id = IMG_AZURE_UBUNTU_PRO_FIPS_BIONIC + with session_cloud.launch( + launch_kwargs={"image_id": image_id} + ) as instance: + # We can't use setup_image fixture here because we want to avoid + # taking a snapshot or cleaning the booted machine after cloud-init + # upgrade. + instance.install_new_cloud_init( + source, take_snapshot=False, clean=False + ) + _check_iid_insensitive_across_kernel_upgrade(instance) diff --git a/tests/integration_tests/instances.py b/tests/integration_tests/instances.py index 0d9852c3..055ec758 100644 --- a/tests/integration_tests/instances.py +++ b/tests/integration_tests/instances.py @@ -116,7 +116,8 @@ class IntegrationInstance: def install_new_cloud_init( self, source: CloudInitSource, - take_snapshot=True + take_snapshot=True, + clean=True, ): if source == CloudInitSource.DEB_PACKAGE: self.install_deb() @@ -133,7 +134,8 @@ class IntegrationInstance: ) version = self.execute('cloud-init -v').split()[-1] log.info('Installed cloud-init version: %s', version) - self.instance.clean() + if clean: + self.instance.clean() if take_snapshot: snapshot_id = self.snapshot() self.cloud.snapshot_id = snapshot_id diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py index dc615309..152a2e1a 100644 --- a/tests/unittests/test_datasource/test_azure.py +++ b/tests/unittests/test_datasource/test_azure.py @@ -201,6 +201,7 @@ IMDS_NETWORK_METADATA = { } MOCKPATH = 'cloudinit.sources.DataSourceAzure.' +EXAMPLE_UUID = 'd0df4c54-4ecb-4a4b-9954-5bdf3ed5c3b8' class TestParseNetworkConfig(CiTestCase): @@ -630,7 +631,7 @@ scbus-1 on xpt0 bus 0 return dsaz def _get_ds(self, data, agent_command=None, distro='ubuntu', - apply_network=None): + apply_network=None, instance_id=None): def dsdevs(): return data.get('dsdevs', []) @@ -659,7 +660,10 @@ scbus-1 on xpt0 bus 0 self.m_ephemeral_dhcpv4 = mock.MagicMock() self.m_ephemeral_dhcpv4_with_reporting = mock.MagicMock() - self.instance_id = 'D0DF4C54-4ECB-4A4B-9954-5BDF3ED5C3B8' + if instance_id: + self.instance_id = instance_id + else: + self.instance_id = EXAMPLE_UUID def _dmi_mocks(key): if key == 'system-uuid': @@ -910,7 +914,7 @@ scbus-1 on xpt0 bus 0 'azure_data': { 'configurationsettype': 'LinuxProvisioningConfiguration'}, 'imds': NETWORK_METADATA, - 'instance-id': 'D0DF4C54-4ECB-4A4B-9954-5BDF3ED5C3B8', + 'instance-id': EXAMPLE_UUID, 'local-hostname': u'myhost', 'random_seed': 'wild'} @@ -1613,6 +1617,32 @@ scbus-1 on xpt0 bus 0 self.assertTrue(ret) self.assertEqual('value', dsrc.metadata['test']) + def test_instance_id_case_insensitive(self): + """Return the previous iid when current is a case-insensitive match.""" + lower_iid = EXAMPLE_UUID.lower() + upper_iid = EXAMPLE_UUID.upper() + # lowercase current UUID + ds = self._get_ds( + {'ovfcontent': construct_valid_ovf_env()}, instance_id=lower_iid + ) + # UPPERCASE previous + write_file( + os.path.join(self.paths.cloud_dir, 'data', 'instance-id'), + upper_iid) + ds.get_data() + self.assertEqual(upper_iid, ds.metadata['instance-id']) + + # UPPERCASE current UUID + ds = self._get_ds( + {'ovfcontent': construct_valid_ovf_env()}, instance_id=upper_iid + ) + # lowercase previous + write_file( + os.path.join(self.paths.cloud_dir, 'data', 'instance-id'), + lower_iid) + ds.get_data() + self.assertEqual(lower_iid, ds.metadata['instance-id']) + def test_instance_id_endianness(self): """Return the previous iid when dmi uuid is the byteswapped iid.""" ds = self._get_ds({'ovfcontent': construct_valid_ovf_env()}) @@ -1628,8 +1658,7 @@ scbus-1 on xpt0 bus 0 os.path.join(self.paths.cloud_dir, 'data', 'instance-id'), '644CDFD0-CB4E-4B4A-9954-5BDF3ED5C3B8') ds.get_data() - self.assertEqual( - 'D0DF4C54-4ECB-4A4B-9954-5BDF3ED5C3B8', ds.metadata['instance-id']) + self.assertEqual(self.instance_id, ds.metadata['instance-id']) def test_instance_id_from_dmidecode_used(self): ds = self._get_ds({'ovfcontent': construct_valid_ovf_env()}) @@ -184,5 +184,6 @@ markers = instance_name: the name to be used for the test instance sru_2020_11: test is part of the 2020/11 SRU verification sru_2021_01: test is part of the 2021/01 SRU verification + sru_next: test is part of the next SRU verification ubuntu: this test should run on Ubuntu unstable: skip this test because it is flakey |