From 66e2d42dd1b722dc8e59f4e5990cea54f81ccd2a Mon Sep 17 00:00:00 2001
From: Chad Smith <chad.smith@canonical.com>
Date: Fri, 19 Feb 2021 15:37:57 -0700
Subject: azure: case-insensitive UUID to avoid new IID during kernel upgrade
 (#798)

Kernel's newer than 4.15 present /sys/dmi/id/product_uuid as a
lowercase value. Previously UUID was uppercase.

Azure datasource reads the product_uuid directly as their platform's
instance-id. This presents a problem if a kernel is either
upgraded or downgraded across the 4.15 kernel version boundary because
the case of the UUID will change, resulting in cloud-init seeing a
"new" instance id and re-running all modules.

Re-running cc_ssh in cloud-init deletes and regenerates ssh_host keys
on a system which can cause concern on long-running instances that
somethingnefarious has happened.

Also add:
- An integration test for this for Azure Bionic Ubuntu FIPS upgrading from
   a FIPS kernel with uppercase UUID to a lowercase UUID in linux-azure
- A new pytest.mark.sru_next to collect all integration tests related to our
   next SRU

LP: #1835584
---
 tests/integration_tests/bugs/test_lp1835584.py | 104 +++++++++++++++++++++++++
 tests/integration_tests/instances.py           |   6 +-
 tests/unittests/test_datasource/test_azure.py  |  39 ++++++++--
 3 files changed, 142 insertions(+), 7 deletions(-)
 create mode 100644 tests/integration_tests/bugs/test_lp1835584.py

(limited to 'tests')

diff --git a/tests/integration_tests/bugs/test_lp1835584.py b/tests/integration_tests/bugs/test_lp1835584.py
new file mode 100644
index 00000000..660d2a2a
--- /dev/null
+++ b/tests/integration_tests/bugs/test_lp1835584.py
@@ -0,0 +1,104 @@
+""" Integration test for LP #1835584
+
+Upstream linux kernels prior to 4.15 providate DMI product_uuid in uppercase.
+More recent kernels switched to lowercase for DMI product_uuid. Azure
+datasource uses this product_uuid as the instance-id for cloud-init.
+
+The linux-azure-fips kernel installed in PRO FIPs images, that product UUID is
+uppercase whereas the linux-azure cloud-optimized kernel reports the UUID as
+lowercase.
+
+In cases where product_uuid changes case, ensure cloud-init doesn't
+recreate ssh hostkeys across reboot (due to detecting an instance_id change).
+
+This currently only affects linux-azure-fips -> linux-azure on Bionic.
+This test won't run on Xenial because both linux-azure-fips and linux-azure
+report uppercase product_uuids.
+
+The test will launch a specific Bionic Ubuntu PRO FIPS image which has a
+linux-azure-fips kernel known to report product_uuid as uppercase. Then upgrade
+and reboot into linux-azure kernel which is known to report product_uuid as
+lowercase.
+
+Across the reboot, assert that we didn't re-run config_ssh by virtue of
+seeing only one semaphore creation log entry of type:
+
+ Writing to /var/lib/cloud/instances/<UUID>/sem/config_ssh -
+
+https://bugs.launchpad.net/cloud-init/+bug/1835584
+"""
+import re
+
+import pytest
+
+from tests.integration_tests.instances import IntegrationAzureInstance
+from tests.integration_tests.clouds import (
+    ImageSpecification, IntegrationCloud
+)
+from tests.integration_tests.conftest import get_validated_source
+
+
+IMG_AZURE_UBUNTU_PRO_FIPS_BIONIC = (
+    "Canonical:0001-com-ubuntu-pro-bionic-fips:pro-fips-18_04:18.04.202010201"
+)
+
+
+def _check_iid_insensitive_across_kernel_upgrade(
+    instance: IntegrationAzureInstance
+):
+    uuid = instance.read_from_file("/sys/class/dmi/id/product_uuid")
+    assert uuid.isupper(), (
+        "Expected uppercase UUID on Ubuntu FIPS image {}".format(
+            uuid
+        )
+    )
+    orig_kernel = instance.execute("uname -r").strip()
+    assert "azure-fips" in orig_kernel
+    result = instance.execute("apt-get update")
+    # Install a 5.4+ kernel which provides lowercase product_uuid
+    result = instance.execute("apt-get install linux-azure --assume-yes")
+    if not result.ok:
+        pytest.fail("Unable to install linux-azure kernel: {}".format(result))
+    instance.restart()
+    new_kernel = instance.execute("uname -r").strip()
+    assert orig_kernel != new_kernel
+    assert "azure-fips" not in new_kernel
+    assert "azure" in new_kernel
+    new_uuid = instance.read_from_file("/sys/class/dmi/id/product_uuid")
+    assert (
+        uuid.lower() == new_uuid
+    ), "Expected UUID on linux-azure to be lowercase of FIPS: {}".format(uuid)
+    log = instance.read_from_file("/var/log/cloud-init.log")
+    RE_CONFIG_SSH_SEMAPHORE = r"Writing.*sem/config_ssh "
+    ssh_runs = len(re.findall(RE_CONFIG_SSH_SEMAPHORE, log))
+    assert 1 == ssh_runs, "config_ssh ran too many times {}".format(ssh_runs)
+
+
+@pytest.mark.azure
+@pytest.mark.sru_next
+def test_azure_kernel_upgrade_case_insensitive_uuid(
+    session_cloud: IntegrationCloud
+):
+    cfg_image_spec = ImageSpecification.from_os_image()
+    if (cfg_image_spec.os, cfg_image_spec.release) != ("ubuntu", "bionic"):
+        pytest.skip(
+            "Test only supports ubuntu:bionic not {0.os}:{0.release}".format(
+                cfg_image_spec
+            )
+        )
+    source = get_validated_source(session_cloud)
+    if not source.installs_new_version():
+        pytest.skip(
+            "Provide CLOUD_INIT_SOURCE to install expected working cloud-init"
+        )
+    image_id = IMG_AZURE_UBUNTU_PRO_FIPS_BIONIC
+    with session_cloud.launch(
+        launch_kwargs={"image_id": image_id}
+    ) as instance:
+        # We can't use setup_image fixture here because we want to avoid
+        # taking a snapshot or cleaning the booted machine after cloud-init
+        # upgrade.
+        instance.install_new_cloud_init(
+            source, take_snapshot=False, clean=False
+        )
+        _check_iid_insensitive_across_kernel_upgrade(instance)
diff --git a/tests/integration_tests/instances.py b/tests/integration_tests/instances.py
index 0d9852c3..055ec758 100644
--- a/tests/integration_tests/instances.py
+++ b/tests/integration_tests/instances.py
@@ -116,7 +116,8 @@ class IntegrationInstance:
     def install_new_cloud_init(
         self,
         source: CloudInitSource,
-        take_snapshot=True
+        take_snapshot=True,
+        clean=True,
     ):
         if source == CloudInitSource.DEB_PACKAGE:
             self.install_deb()
@@ -133,7 +134,8 @@ class IntegrationInstance:
             )
         version = self.execute('cloud-init -v').split()[-1]
         log.info('Installed cloud-init version: %s', version)
-        self.instance.clean()
+        if clean:
+            self.instance.clean()
         if take_snapshot:
             snapshot_id = self.snapshot()
             self.cloud.snapshot_id = snapshot_id
diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py
index dc615309..152a2e1a 100644
--- a/tests/unittests/test_datasource/test_azure.py
+++ b/tests/unittests/test_datasource/test_azure.py
@@ -201,6 +201,7 @@ IMDS_NETWORK_METADATA = {
 }
 
 MOCKPATH = 'cloudinit.sources.DataSourceAzure.'
+EXAMPLE_UUID = 'd0df4c54-4ecb-4a4b-9954-5bdf3ed5c3b8'
 
 
 class TestParseNetworkConfig(CiTestCase):
@@ -630,7 +631,7 @@ scbus-1 on xpt0 bus 0
         return dsaz
 
     def _get_ds(self, data, agent_command=None, distro='ubuntu',
-                apply_network=None):
+                apply_network=None, instance_id=None):
 
         def dsdevs():
             return data.get('dsdevs', [])
@@ -659,7 +660,10 @@ scbus-1 on xpt0 bus 0
         self.m_ephemeral_dhcpv4 = mock.MagicMock()
         self.m_ephemeral_dhcpv4_with_reporting = mock.MagicMock()
 
-        self.instance_id = 'D0DF4C54-4ECB-4A4B-9954-5BDF3ED5C3B8'
+        if instance_id:
+            self.instance_id = instance_id
+        else:
+            self.instance_id = EXAMPLE_UUID
 
         def _dmi_mocks(key):
             if key == 'system-uuid':
@@ -910,7 +914,7 @@ scbus-1 on xpt0 bus 0
             'azure_data': {
                 'configurationsettype': 'LinuxProvisioningConfiguration'},
             'imds': NETWORK_METADATA,
-            'instance-id': 'D0DF4C54-4ECB-4A4B-9954-5BDF3ED5C3B8',
+            'instance-id': EXAMPLE_UUID,
             'local-hostname': u'myhost',
             'random_seed': 'wild'}
 
@@ -1613,6 +1617,32 @@ scbus-1 on xpt0 bus 0
         self.assertTrue(ret)
         self.assertEqual('value', dsrc.metadata['test'])
 
+    def test_instance_id_case_insensitive(self):
+        """Return the previous iid when current is a case-insensitive match."""
+        lower_iid = EXAMPLE_UUID.lower()
+        upper_iid = EXAMPLE_UUID.upper()
+        # lowercase current UUID
+        ds = self._get_ds(
+            {'ovfcontent': construct_valid_ovf_env()}, instance_id=lower_iid
+        )
+        # UPPERCASE previous
+        write_file(
+            os.path.join(self.paths.cloud_dir, 'data', 'instance-id'),
+            upper_iid)
+        ds.get_data()
+        self.assertEqual(upper_iid, ds.metadata['instance-id'])
+
+        # UPPERCASE current UUID
+        ds = self._get_ds(
+            {'ovfcontent': construct_valid_ovf_env()}, instance_id=upper_iid
+        )
+        # lowercase previous
+        write_file(
+            os.path.join(self.paths.cloud_dir, 'data', 'instance-id'),
+            lower_iid)
+        ds.get_data()
+        self.assertEqual(lower_iid, ds.metadata['instance-id'])
+
     def test_instance_id_endianness(self):
         """Return the previous iid when dmi uuid is the byteswapped iid."""
         ds = self._get_ds({'ovfcontent': construct_valid_ovf_env()})
@@ -1628,8 +1658,7 @@ scbus-1 on xpt0 bus 0
             os.path.join(self.paths.cloud_dir, 'data', 'instance-id'),
             '644CDFD0-CB4E-4B4A-9954-5BDF3ED5C3B8')
         ds.get_data()
-        self.assertEqual(
-            'D0DF4C54-4ECB-4A4B-9954-5BDF3ED5C3B8', ds.metadata['instance-id'])
+        self.assertEqual(self.instance_id, ds.metadata['instance-id'])
 
     def test_instance_id_from_dmidecode_used(self):
         ds = self._get_ds({'ovfcontent': construct_valid_ovf_env()})
-- 
cgit v1.2.3