Skip to content

mini design of ZTP based onie switch discovery and configuration

yangsong edited this page Jun 2, 2017 · 4 revisions
  1. predefine switch on MN according to the network plan:
[root@fs3 ~]# lsdef mid05tor10
Object name: mid05tor10
    groups=switch
    ip=172.21.205.10
    nodetype=switch
    switch=mgmtsw01
    switchport=10
    usercomment=Edgecore Networks Switch

and run "makehosts" on mn

  1. After run "makedhcp -n" on MN, the /etc/dhcp/dhcpd.conf on MN looks like:
[root@fs3 ~]# cat /etc/dhcp/dhcpd.conf
#xCAT generated dhcp configuration
option conf-file code 209 = text;
option space isan;
option isan-encap-opts code 43 = encapsulate isan;
option isan.iqn code 203 = string;
option isan.root-path code 201 = string;
option space gpxe;
option gpxe-encap-opts code 175 = encapsulate gpxe;
option gpxe.bus-id code 177 = string;
option user-class-identifier code 77 = string;
option gpxe.no-pxedhcp code 176 = unsigned integer 8;
option tcode code 101 = text;
option iscsi-initiator-iqn code 203 = string;
ddns-update-style interim;
ignore client-updates;
option client-architecture code 93 = unsigned integer 16;
option tcode "America/New_York";
option gpxe.no-pxedhcp 1;
option www-server code 114 = string;
option cumulus-provision-url code 239 = text;
omapi-port 7911;
key xcat_key {
  algorithm hmac-md5;
  secret "b0tudGdnTHdqSmFZWmpwR3JTWTdiaXRPMlF1REI5c1k=";
};
omapi-key xcat_key;
class "pxe" {
   match if substring (option vendor-class-identifier, 0, 9) = "PXEClient";
   ddns-updates off;
    max-lease-time 600;
}
shared-network enP3p3s0d1 {
  subnet 172.21.0.0 netmask 255.255.0.0 {
    authoritative;
    max-lease-time 43200;
    min-lease-time 43200;
    default-lease-time 43200;
    option routers  172.21.253.27;
    next-server  172.21.253.27;
    option log-servers 172.21.253.27;
    option ntp-servers 172.21.253.27;
    option domain-name "pok.stglabs.ibm.com";
    option domain-name-servers  172.21.253.27;
    option domain-search  "pok.stglabs.ibm.com";
    option cumulus-provision-url "http://172.21.253.27/install/postscripts/onieztp";
    zone pok.stglabs.ibm.com. {
       primary 172.21.253.27; key xcat_key;
    }
    zone 21.172.IN-ADDR.ARPA. {
       primary 172.21.253.27; key xcat_key;
    }
    if option user-class-identifier = "xNBA" and option client-architecture = 00:00 { #x86, xCAT Network Boot Agent
        always-broadcast on;
        filename = "http://172.21.253.27/tftpboot/xcat/xnba/nets/172.21.0.0_16";
    } else if option user-class-identifier = "xNBA" and option client-architecture = 00:09 { #x86, xCAT Network Boot Agent
        filename = "http://172.21.253.27/tftpboot/xcat/xnba/nets/172.21.0.0_16.uefi";
    } else if option client-architecture = 00:00  { #x86
        filename "xcat/xnba.kpxe";
    } else if option vendor-class-identifier = "Etherboot-5.4"  { #x86
        filename "xcat/xnba.kpxe";
    } else if option client-architecture = 00:07 { #x86_64 uefi
         filename "xcat/xnba.efi";
    } else if option client-architecture = 00:09 { #x86_64 uefi alternative id
         filename "xcat/xnba.efi";
    } else if option client-architecture = 00:02 { #ia64
         filename "elilo.efi";
    } else if option client-architecture = 00:0e { #OPAL-v3
         option conf-file = "http://172.21.253.27/tftpboot/pxelinux.cfg/p/172.21.0.0_16";
    } else if substring (option vendor-class-identifier,0,11) = "onie_vendor" { #for onie on cumulus switch
        option www-server = "http://172.21.253.27/install/onie/onie-installer";
    } else if substring(filename,0,1) = null { #otherwise, provide yaboot if the client isn't specific
         filename "/yaboot";
    }
    range dynamic-bootp 172.21.253.100 172.21.253.200;
  } # 172.21.0.0/255.255.0.0 subnet_end
} # enP3p3s0d1 nic_end
shared-network enP3p3s0 {
  subnet 172.20.0.0 netmask 255.255.0.0 {
    authoritative;
    max-lease-time 43200;
    min-lease-time 43200;
    default-lease-time 43200;
    option routers  172.20.253.27;
    next-server  172.20.253.27;
    option log-servers 172.20.253.27;
    option ntp-servers 172.20.253.27;
    option domain-name "pok.stglabs.ibm.com";
    option domain-name-servers  172.21.253.27;
    option interface-mtu 9000;
    option domain-search  "pok.stglabs.ibm.com";
    option cumulus-provision-url "http://172.20.253.27/install/postscripts/onieztp";
    zone pok.stglabs.ibm.com. {
       primary 172.21.253.27; key xcat_key;
    }
    zone 20.172.IN-ADDR.ARPA. {
       primary 172.21.253.27; key xcat_key;
    }
    if option user-class-identifier = "xNBA" and option client-architecture = 00:00 { #x86, xCAT Network Boot Agent
        always-broadcast on;
        filename = "http://172.20.253.27/tftpboot/xcat/xnba/nets/172.20.0.0_16";
    } else if option user-class-identifier = "xNBA" and option client-architecture = 00:09 { #x86, xCAT Network Boot Agent
        filename = "http://172.20.253.27/tftpboot/xcat/xnba/nets/172.20.0.0_16.uefi";
    } else if option client-architecture = 00:00  { #x86
        filename "xcat/xnba.kpxe";
    } else if option vendor-class-identifier = "Etherboot-5.4"  { #x86
        filename "xcat/xnba.kpxe";
    } else if option client-architecture = 00:07 { #x86_64 uefi
         filename "xcat/xnba.efi";
    } else if option client-architecture = 00:09 { #x86_64 uefi alternative id
         filename "xcat/xnba.efi";
    } else if option client-architecture = 00:02 { #ia64
         filename "elilo.efi";
    } else if option client-architecture = 00:0e { #OPAL-v3
         option conf-file = "http://172.20.253.27/tftpboot/pxelinux.cfg/p/172.20.0.0_16";
    } else if substring (option vendor-class-identifier,0,11) = "onie_vendor" { #for onie on cumulus switch
        option www-server = "http://172.20.253.27/install/onie/onie-installer";
    } else if substring(filename,0,1) = null { #otherwise, provide yaboot if the client isn't specific
         filename "/yaboot";
    }
    range dynamic-bootp 172.20.253.100 172.20.253.200;
  } # 172.20.0.0/255.255.0.0 subnet_end
} # enP3p3s0 nic_end

  1. plug in the mgt interface of onie switch to core or mid switch and power on it

Prequisites: (1). the cumulus os has been installed in the onie switch. (2). the ztp is enabled( with ztp -R or fresh installed)

the onie switch will get its ip address for its mgt interface via dhcp, then download and execute the ztp script "http://172.20.253.27/install/postscripts/onieztp".

Just like xCAT-genesis-scripts/bin/doxcat, the workflow is:

(1) documulusdiscovery draft version: https://github.com/xcat2/xcat-core/pull/3129 the script to obtain the switch information, such as mac of mgt interface, arch , mts. serial number, then build a "findme" request including the switch information, and send the request to MN:3001 via UDP:301. Upon xcatd on MN receive this "findme" request, the mac address in the "findme" request will be looked up in the mac table of the parent switch "mgmtsw01" to find the switch port on the parent switch, then look up the predefined switch definition in xCAT DB according to the "switch" and "switchport".

    switch=mgmtsw01
    switchport=10

on success the switch definition will be updated with the information in the "findme" request, then notify the switch that it has been discovered.

[root@fs3 ~]# lsdef mid05tor10
Object name: mid05tor10
    arch=armv7l
    groups=switch
    ip=172.21.205.10
    mac=8c:ea:1b:e8:78:c0
    nodetype=switch
    serial=11S01FT690YA50YD73EACH
    supportedarchs=armv7l
    switch=mgmtsw01
    switchport=10
    usercomment=Edgecore Networks Switch

(2) establish a TCP connection to xcatd(3001) on MN to (send request and) listen to any response from MN(like minixcatd.awk)(TODO).
* if receive discovery success notification from MN, "ifdown+ifup" the mgt interface to apply the specified ip address to mgt interface, goto step (3)
* otherwise, retry "documulusdiscovery" until success or exceed maximum retry times.
* If still cannot be discovered until maximum retry times, run "ztp -R" and exit, there should be messages in the clusters.log on MN to make the admin aware of this.

(3). onieztp(should better be renamed to configonie)
draft version: https://github.com/xcat2/xcat-core/pull/3085
perform the basic configuration of the switch, including:
a. Add Debian Repositories(this requires public internet access, which is not the case in most cluster)
b. pull root ssh keys, config passwordless, should better make sure compute nodes cannot ssh the switch without password.(TODO)
c. enable and config snmpd
d. config base interface, including
* I.create default bridge
* II.create interfaces for data ports and attach them to the bridge
e. config the static network configuration for mgt interface, should better leverage "confignics -s"(TODO)
The configonie should better be idempotent to avoid to affect the switch which has been configured sometime before.(TODO)
Notify the MN the status of configuration.(TODO)

(4). enable chain in ztp based switch discovery process (TODO, NICE TO HAVE)
(5). what if we convert this discovery and configuration workflow to a systemd/sysvinit service unit in cumulus?(TODO, NEED TO EVALUATE)

News

History

  • Oct 22, 2010: xCAT 2.5 released.
  • Apr 30, 2010: xCAT 2.4 is released.
  • Oct 31, 2009: xCAT 2.3 released. xCAT's 10 year anniversary!
  • Apr 16, 2009: xCAT 2.2 released.
  • Oct 31, 2008: xCAT 2.1 released.
  • Sep 12, 2008: Support for xCAT 2 can now be purchased!
  • June 9, 2008: xCAT breaths life into (at the time) the fastest supercomputer on the planet
  • May 30, 2008: xCAT 2.0 for Linux officially released!
  • Oct 31, 2007: IBM open sources xCAT 2.0 to allow collaboration among all of the xCAT users.
  • Oct 31, 1999: xCAT 1.0 is born!
    xCAT started out as a project in IBM developed by Egan Ford. It was quickly adopted by customers and IBM manufacturing sites to rapidly deploy clusters.
Clone this wiki locally