From 3d10b8d080a874be022f9e25063ba77f0293c5e8 Mon Sep 17 00:00:00 2001
From: Scott Moser <smoser@ubuntu.com>
Date: Thu, 25 Jul 2013 14:37:10 -0400
Subject: azure: support bouncing interfaces to publish new hostname

See the added doc/sources/azure/README.rst for why this is necessary.
Essentially, we now are doing the following in the get_data() method
of azure datasource to publish this NewHostname:
 hostname NewHostName
 ifdown eth0;
 ifup eth0

LP: #1202758
---
 cloudinit/sources/DataSourceAzure.py          | 114 +++++++++++++++++-----
 doc/examples/cloud-config-datasources.txt     |   5 +
 doc/sources/azure/README.rst                  | 134 ++++++++++++++++++++++++++
 tests/unittests/test_datasource/test_azure.py |  77 ++++++++++++++-
 4 files changed, 300 insertions(+), 30 deletions(-)
 create mode 100644 doc/sources/azure/README.rst

diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
index 0a5caebe..30b06fef 100644
--- a/cloudinit/sources/DataSourceAzure.py
+++ b/cloudinit/sources/DataSourceAzure.py
@@ -31,9 +31,20 @@ LOG = logging.getLogger(__name__)
 DS_NAME = 'Azure'
 DEFAULT_METADATA = {"instance-id": "iid-AZURE-NODE"}
 AGENT_START = ['service', 'walinuxagent', 'start']
-BUILTIN_DS_CONFIG = {'datasource': {DS_NAME: {
-     'agent_command': AGENT_START,
-     'data_dir': "/var/lib/waagent"}}}
+BOUNCE_COMMAND = ("i=$interface; x=0; ifdown $i || x=$?; "
+                  "ifup $i || x=$?; exit $x")
+BUILTIN_DS_CONFIG = {
+    'agent_command': AGENT_START,
+    'data_dir': "/var/lib/waagent",
+    'set_hostname': True,
+    'hostname_bounce': {
+        'interface': 'eth0',
+        'policy': True,
+        'command': BOUNCE_COMMAND,
+        'hostname_command': 'hostname',
+    }
+}
+DS_CFG_PATH = ['datasource', DS_NAME]
 
 
 class DataSourceAzureNet(sources.DataSource):
@@ -42,19 +53,19 @@ class DataSourceAzureNet(sources.DataSource):
         self.seed_dir = os.path.join(paths.seed_dir, 'azure')
         self.cfg = {}
         self.seed = None
+        self.ds_cfg = util.mergemanydict([
+            util.get_cfg_by_path(sys_cfg, DS_CFG_PATH),
+            BUILTIN_DS_CONFIG])
 
     def __str__(self):
         root = sources.DataSource.__str__(self)
         return "%s [seed=%s]" % (root, self.seed)
 
     def get_data(self):
-        ddir_cfgpath = ['datasource', DS_NAME, 'data_dir']
         # azure removes/ejects the cdrom containing the ovf-env.xml
         # file on reboot.  So, in order to successfully reboot we
         # need to look in the datadir and consider that valid
-        ddir = util.get_cfg_by_path(self.sys_cfg, ddir_cfgpath)
-        if ddir is None:
-            ddir = util.get_cfg_by_path(BUILTIN_DS_CONFIG, ddir_cfgpath)
+        ddir = self.ds_cfg['data_dir']
 
         candidates = [self.seed_dir]
         candidates.extend(list_possible_azure_ds_devs())
@@ -91,36 +102,40 @@ class DataSourceAzureNet(sources.DataSource):
             return False
 
         if found == ddir:
-            LOG.debug("using cached datasource in %s", ddir)
-
-        fields = [('cmd', ['datasource', DS_NAME, 'agent_command']),
-                  ('datadir', ddir_cfgpath)]
-        mycfg = {}
-        for cfg in (self.cfg, self.sys_cfg, BUILTIN_DS_CONFIG):
-            for name, path in fields:
-                if name in mycfg:
-                    continue
-                value = util.get_cfg_by_path(cfg, keyp=path)
-                if value is not None:
-                    mycfg[name] = value
+            LOG.debug("using files cached in %s", ddir)
+
+        # now update ds_cfg to reflect contents pass in config
+        usercfg = util.get_cfg_by_path(self.cfg, DS_CFG_PATH, {})
+        self.ds_cfg = util.mergemanydict([usercfg, self.ds_cfg])
+        mycfg = self.ds_cfg
 
         # walinux agent writes files world readable, but expects
         # the directory to be protected.
-        write_files(mycfg['datadir'], files, dirmode=0700)
+        write_files(mycfg['data_dir'], files, dirmode=0700)
+
+        # handle the hostname 'publishing'
+        try:
+            handle_set_hostname(mycfg.get('set_hostname'),
+                                self.metadata.get('local-hostname'),
+                                mycfg['hostname_bounce'])
+        except Exception as e:
+            LOG.warn("Failed publishing hostname: %s" % e)
+            util.logexc(LOG, "handling set_hostname failed")
 
         try:
-            invoke_agent(mycfg['cmd'])
+            invoke_agent(mycfg['agent_command'])
         except util.ProcessExecutionError:
             # claim the datasource even if the command failed
-            util.logexc(LOG, "agent command '%s' failed.", mycfg['cmd'])
+            util.logexc(LOG, "agent command '%s' failed.",
+                        mycfg['agent_command'])
 
-        shcfgxml = os.path.join(mycfg['datadir'], "SharedConfig.xml")
+        shcfgxml = os.path.join(mycfg['data_dir'], "SharedConfig.xml")
         wait_for = [shcfgxml]
 
         fp_files = []
         for pk in self.cfg.get('_pubkeys', []):
             bname = pk['fingerprint'] + ".crt"
-            fp_files += [os.path.join(mycfg['datadir'], bname)]
+            fp_files += [os.path.join(mycfg['data_dir'], bname)]
 
         start = time.time()
         missing = wait_for_files(wait_for + fp_files)
@@ -148,6 +163,43 @@ class DataSourceAzureNet(sources.DataSource):
         return self.cfg
 
 
+def handle_set_hostname(enabled, hostname, cfg):
+    if not util.is_true(enabled):
+        return
+
+    if not hostname:
+        LOG.warn("set_hostname was true but no local-hostname")
+        return
+
+    apply_hostname_bounce(hostname=hostname, policy=cfg['policy'],
+                          interface=cfg['interface'],
+                          command=cfg['command'],
+                          hostname_command=cfg['hostname_command'])
+
+
+def apply_hostname_bounce(hostname, policy, interface, command,
+                          hostname_command="hostname"):
+    # set the hostname to 'hostname' if it is not already set to that.
+    # then, if policy is not off, bounce the interface using command
+    prev_hostname = util.subp(hostname_command, capture=True)[0].strip()
+
+    util.subp([hostname_command, hostname])
+
+    if util.is_false(policy):
+        return
+
+    if prev_hostname == hostname and policy != "force":
+        return
+
+    env = os.environ.copy()
+    env['interface'] = interface
+
+    if command == "builtin":
+        command = BOUNCE_COMMAND
+
+    util.subp(command, shell=(not isinstance(command, list)), capture=True)
+
+
 def crtfile_to_pubkey(fname):
     pipeline = ('openssl x509 -noout -pubkey < "$0" |'
                 'ssh-keygen -i -m PKCS8 -f /dev/stdin')
@@ -319,15 +371,21 @@ def read_azure_ovf(contents):
         name = child.localName.lower()
 
         simple = False
+        value = ""
         if (len(child.childNodes) == 1 and
             child.childNodes[0].nodeType == dom.TEXT_NODE):
             simple = True
             value = child.childNodes[0].wholeText
 
+        attrs = {k: v for k, v in child.attributes.items()}
+
         # we accept either UserData or CustomData.  If both are present
         # then behavior is undefined.
         if (name == "userdata" or name == "customdata"):
-            ud = base64.b64decode(''.join(value.split()))
+            if attrs.get('encoding') in (None, "base64"):
+                ud = base64.b64decode(''.join(value.split()))
+            else:
+                ud = value
         elif name == "username":
             username = value
         elif name == "userpassword":
@@ -335,7 +393,11 @@ def read_azure_ovf(contents):
         elif name == "hostname":
             md['local-hostname'] = value
         elif name == "dscfg":
-            cfg['datasource'] = {DS_NAME: util.load_yaml(value, default={})}
+            if attrs.get('encoding') in (None, "base64"):
+                dscfg = base64.b64decode(''.join(value.split()))
+            else:
+                dscfg = value
+            cfg['datasource'] = {DS_NAME: util.load_yaml(dscfg, default={})}
         elif name == "ssh":
             cfg['_pubkeys'] = load_azure_ovf_pubkeys(child)
         elif name == "disablesshpasswordauthentication":
diff --git a/doc/examples/cloud-config-datasources.txt b/doc/examples/cloud-config-datasources.txt
index a19353fc..6544448e 100644
--- a/doc/examples/cloud-config-datasources.txt
+++ b/doc/examples/cloud-config-datasources.txt
@@ -45,6 +45,11 @@ datasource:
 
   Azure:
     agent_command: [service, walinuxagent, start]
+    set_hostname: True
+    hostname_bounce:
+      interface: eth0
+      policy: on # [can be 'on', 'off' or 'force']
+    }
 
   SmartOS:
     # Smart OS datasource works over a serial console interacting with
diff --git a/doc/sources/azure/README.rst b/doc/sources/azure/README.rst
new file mode 100644
index 00000000..8239d1fa
--- /dev/null
+++ b/doc/sources/azure/README.rst
@@ -0,0 +1,134 @@
+================
+Azure Datasource
+================
+
+This datasource finds metadata and user-data from the Azure cloud platform.
+
+Azure Platform
+--------------
+The azure cloud-platform provides initial data to an instance via an attached
+CD formated in UDF.  That CD contains a 'ovf-env.xml' file that provides some
+information.  Additional information is obtained via interaction with the
+"endpoint".  The ip address of the endpoint is advertised to the instance
+inside of dhcp option 245.  On ubuntu, that can be seen in
+/var/lib/dhcp/dhclient.eth0.leases as a colon delimited hex value (example:
+``option unknown-245 64:41:60:82;`` is 100.65.96.130)
+
+walinuxagent
+------------
+In order to operate correctly, cloud-init needs walinuxagent to provide much
+of the interaction with azure.  In addition to "provisioning" code, walinux
+does the following on the agent is a long running daemon that handles the
+following things:
+- generate a x509 certificate and send that to the endpoint
+
+waagent.conf config
+~~~~~~~~~~~~~~~~~~~
+in order to use waagent.conf with cloud-init, the following settings are recommended.  Other values can be changed or set to the defaults.
+
+  ::
+
+   # disabling provisioning turns off all 'Provisioning.*' function
+   Provisioning.Enabled=n
+   # this is currently not handled by cloud-init, so let walinuxagent do it.
+   ResourceDisk.Format=y
+   ResourceDisk.MountPoint=/mnt
+
+
+Userdata
+--------
+Userdata is provided to cloud-init inside the ovf-env.xml file. Cloud-init
+expects that user-data will be provided as base64 encoded value inside the
+text child of a element named ``UserData`` or ``CustomData`` which is a direct
+child of the ``LinuxProvisioningConfigurationSet`` (a sibling to ``UserName``)
+If both ``UserData`` and ``CustomData`` are provided behavior is undefined on
+which will be selected.
+
+In the example below, user-data provided is 'this is my userdata', and the
+datasource config provided is ``{"agent_command": ["start", "walinuxagent"]}``.
+That agent command will take affect as if it were specified in system config.
+
+Example:
+
+.. code::
+
+ <wa:ProvisioningSection>
+  <wa:Version>1.0</wa:Version>
+  <LinuxProvisioningConfigurationSet
+     xmlns="http://schemas.microsoft.com/windowsazure"
+     xmlns:i="http://www.w3.org/2001/XMLSchema-instance">
+   <ConfigurationSetType>LinuxProvisioningConfiguration</ConfigurationSetType>
+   <HostName>myHost</HostName>
+   <UserName>myuser</UserName>
+   <UserPassword/>
+   <CustomData>dGhpcyBpcyBteSB1c2VyZGF0YQ===</CustomData>
+   <dscfg>eyJhZ2VudF9jb21tYW5kIjogWyJzdGFydCIsICJ3YWxpbnV4YWdlbnQiXX0=</dscfg>
+   <DisableSshPasswordAuthentication>true</DisableSshPasswordAuthentication>
+   <SSH>
+    <PublicKeys>
+     <PublicKey>
+      <Fingerprint>6BE7A7C3C8A8F4B123CCA5D0C2F1BE4CA7B63ED7</Fingerprint>
+      <Path>this-value-unused</Path>
+     </PublicKey>
+    </PublicKeys>
+   </SSH>
+   </LinuxProvisioningConfigurationSet>
+ </wa:ProvisioningSection>
+
+Configuration
+-------------
+Configuration for the datasource can be read from the system config's or set
+via the `dscfg` entry in the `LinuxProvisioningConfigurationSet`.  Content in
+dscfg node is expected to be base64 encoded yaml content, and it will be
+merged into the 'datasource: Azure' entry.
+
+The '``hostname_bounce: command``' entry can be either the literal string
+'builtin' or a command to execute.  The command will be invoked after the
+hostname is set, and will have the 'interface' in its environment.  If
+``set_hostname`` is not true, then ``hostname_bounce`` will be ignored.
+
+An example might be:
+  command:  ["sh", "-c", "killall dhclient; dhclient $interface"]
+
+.. code::
+
+  datasource:
+   agent_command
+   Azure:
+    agent_command: [service, walinuxagent, start]
+    set_hostname: True
+    hostname_bounce:
+     # the name of the interface to bounce
+     interface: eth0
+     # policy can be 'on', 'off' or 'force'
+     policy: on
+     # the method 'bounce' command.
+     command: "builtin"
+     hostname_command: "hostname"
+    }
+
+hostname
+--------
+When the user launches an instance, they provide a hostname for that instance.
+The hostname is provided to the instance in the ovf-env.xml file as
+``HostName``.
+
+Whatever value the instance provides in its dhcp request will resolve in the
+domain returned in the 'search' request.
+
+The interesting issue is that a generic image will already have a hostname
+configured.  The ubuntu cloud images have 'ubuntu' as the hostname of the
+system, and the initial dhcp request on eth0 is not guaranteed to occur after
+the datasource code has been run.  So, on first boot, that initial value will
+be sent in the dhcp request and *that* value will resolve.
+
+In order to make the ``HostName`` provided in the ovf-env.xml resolve, a
+dhcp request must be made with the new value.  Walinuxagent (in its current
+version) handles this by polling the state of hostname and bouncing ('``ifdown
+eth0; ifup eth0``' the network interface if it sees that a change has been
+made.
+
+cloud-init handles this by setting the hostname in the DataSource's 'get_data'
+method via '``hostname $HostName``', and then bouncing the interface.  This
+behavior can be configured or disabled in the datasource config.  See
+'Configuration' above.
diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py
index 2e8583f9..c944cb13 100644
--- a/tests/unittests/test_datasource/test_azure.py
+++ b/tests/unittests/test_datasource/test_azure.py
@@ -26,8 +26,15 @@ def construct_valid_ovf_env(data=None, pubkeys=None, userdata=None):
   xmlns:i="http://www.w3.org/2001/XMLSchema-instance">
   <ConfigurationSetType>LinuxProvisioningConfiguration</ConfigurationSetType>
     """
-    for key, val in data.items():
-        content += "<%s>%s</%s>\n" % (key, val, key)
+    for key, dval in data.items():
+        if isinstance(dval, dict):
+            val = dval.get('text')
+            attrs = ' ' + ' '.join(["%s='%s'" % (k, v) for k, v in dval.items()
+                                    if k != 'text'])
+        else:
+            val = dval
+            attrs = ""
+        content += "<%s%s>%s</%s>\n" % (key, attrs, val, key)
 
     if userdata:
         content += "<UserData>%s</UserData>\n" % (base64.b64encode(userdata))
@@ -103,6 +110,9 @@ class TestAzureDataSource(MockerTestCase):
             data['iid_from_shared_cfg'] = path
             return 'i-my-azure-id'
 
+        def _apply_hostname_bounce(**kwargs):
+            data['apply_hostname_bounce'] = kwargs
+
         if data.get('ovfcontent') is not None:
             populate_dir(os.path.join(self.paths.seed_dir, "azure"),
                          {'ovf-env.xml': data['ovfcontent']})
@@ -118,7 +128,9 @@ class TestAzureDataSource(MockerTestCase):
                             (mod, 'pubkeys_from_crt_files',
                              _pubkeys_from_crt_files),
                             (mod, 'iid_from_shared_config',
-                             _iid_from_shared_config), ])
+                             _iid_from_shared_config),
+                            (mod, 'apply_hostname_bounce',
+                             _apply_hostname_bounce), ])
 
         dsrc = mod.DataSourceAzureNet(
             data.get('sys_cfg', {}), distro=None, paths=self.paths)
@@ -139,11 +151,26 @@ class TestAzureDataSource(MockerTestCase):
         self.assertEqual(0700, data['datadir_mode'])
         self.assertEqual(dsrc.metadata['instance-id'], 'i-my-azure-id')
 
+    def test_user_cfg_set_agent_command_plain(self):
+        # set dscfg in via plaintext
+        cfg = {'agent_command': "my_command"}
+        odata = {'HostName': "myhost", 'UserName': "myuser",
+                'dscfg': {'text': yaml.dump(cfg), 'encoding': 'plain'}}
+        data = {'ovfcontent': construct_valid_ovf_env(data=odata)}
+
+        dsrc = self._get_ds(data)
+        ret = dsrc.get_data()
+        self.assertTrue(ret)
+        self.assertEqual(data['agent_invoked'], cfg['agent_command'])
+
     def test_user_cfg_set_agent_command(self):
+        # set dscfg in via base64 encoded yaml
         cfg = {'agent_command': "my_command"}
         odata = {'HostName': "myhost", 'UserName': "myuser",
-                'dscfg': yaml.dump(cfg)}
+                'dscfg': {'text': base64.b64encode(yaml.dump(cfg)),
+                          'encoding': 'base64'}}
         data = {'ovfcontent': construct_valid_ovf_env(data=odata)}
+        print data
 
         dsrc = self._get_ds(data)
         ret = dsrc.get_data()
@@ -218,6 +245,48 @@ class TestAzureDataSource(MockerTestCase):
         for mypk in mypklist:
             self.assertIn(mypk, dsrc.cfg['_pubkeys'])
 
+    def test_disabled_bounce(self):
+        pass
+
+    def test_apply_bounce_call_1(self):
+        # hostname needs to get through to apply_hostname_bounce
+        mydata = "FOOBAR"
+        odata = {'HostName': 'my-random-hostname'}
+        data = {'ovfcontent': construct_valid_ovf_env(data=odata)}
+
+        self._get_ds(data).get_data()
+        self.assertIn('hostname', data['apply_hostname_bounce'])
+        self.assertEqual(data['apply_hostname_bounce']['hostname'],
+                         odata['HostName'])
+
+    def test_apply_bounce_call_configurable(self):
+        # hostname_bounce should be configurable in datasource cfg
+        cfg = {'hostname_bounce': {'interface': 'eth1', 'policy': 'off',
+                                   'command': 'my-bounce-command',
+                                   'hostname_command': 'my-hostname-command'}}
+        odata = {'HostName': "xhost",
+                'dscfg': {'text': base64.b64encode(yaml.dump(cfg)),
+                          'encoding': 'base64'}}
+        data = {'ovfcontent': construct_valid_ovf_env(data=odata)}
+        self._get_ds(data).get_data()
+
+        for k in cfg['hostname_bounce']:
+            self.assertIn(k, data['apply_hostname_bounce'])
+
+        for k, v in cfg['hostname_bounce'].items():
+            self.assertEqual(data['apply_hostname_bounce'][k], v)
+
+    def test_set_hostname_disabled(self):
+        # config specifying set_hostname off should not bounce
+        cfg = {'set_hostname': False}
+        odata = {'HostName': "xhost",
+                'dscfg': {'text': base64.b64encode(yaml.dump(cfg)),
+                          'encoding': 'base64'}}
+        data = {'ovfcontent': construct_valid_ovf_env(data=odata)}
+        self._get_ds(data).get_data()
+
+        self.assertEqual(data.get('apply_hostname_bounce', "N/A"), "N/A")
+
 
 class TestReadAzureOvf(MockerTestCase):
     def test_invalid_xml_raises_non_azure_ds(self):
-- 
cgit v1.2.3