### Bootstrapping clusters with
<img src="https://github.com/NixOS/nixos-artwork/raw/master/logo/nixos-white.svg" style="border: none; background: none; box-shadow: none;">
---
### The Setup
* 1 Management Node
* 3 Storage Nodes
* 16 Compute Nodes
---
### Network
* Infiniband
* 1GBit Ethernet Data
* 1GBit Ethernet Management (IPMI)
---
<!-- .slide: data-background="https://i.imgflip.com/8hcwty.jpg" data-background-size="contain" data-background-repeat="no-repeat" -->
---
### Colmena
```nix=
{
host-a = {
# Config for host A goes here
};
host-b = {
# Config for host B goes here
}
}
```
---
### Compute Nodes
```nix=
nodes = listToAttrs
(imap0
(nodeIndex: nodeConfig: {
name = "node-${fixedWithNumber 2 nodeIndex}";
value = {
imports = [
./machines/node
];
_module.args = {
inherit nodeIndex;
inherit nodeConfig;
};
}
})
(import ./machines/nodes.nix))
```
---
### Local Cache
```nix=
service.nix-serve = {
enable = true;
secretKeyFile = "/path/to/cache/private-key.pem";
}
```
---
### Installer Image
```nix=
{
imports = [
"${modulesPath}/installer/netboot/netboot-minimal.nix"
];
nix.settings.substituters = [
"http://manager.cluster"
];
systemd.services."auto-install" = {
unitConfig = {
AssertKernelCommandLine = "nixos.install";
FailureAction = "reboot-force";
};
serviceConfig = {
Type = "oneshot";
ExecStart = ./auto-install;
};
};
}
```
---
### Installer script
```bash=
if [[ "$(cat /proc/cmdline)" =~ nixos\.install=([^ ]+) ]]; then
INSTALL="''${BASH_REMATCH[1]}"
else
return -1
fi
${pkgs.nix}/bin/nix-store \
--realize \
--add-root /tmp/install \
"$INSTALL"
exec /tmp/install
```
---
### PXE Boot
https://github.com/danderson/netboot/tree/main/pixiecore
---
### PXE Boot
```nix=
services.pixiecore = {
enable = true;
mode = "api";
apiServer = "http://boot.cluster/api";
};
services.nginx = {
virtualHosts."boot.cluster".locations = {
"/".proxyPass = "http://localhost:${toString config.services.pixiecore.port}";
"/status".proxyPass = "http://localhost:${toString config.services.pixiecore.statusPort}";
"/api".root = api;
};
};
```
---
### PXE API
```nix=
api = pkgs.linkFarm "pixiecore-api"
(listToAttrs
(map
(name: {
name "pixiecore/v1/boot/${nodes."${name}".config.hpc.dhcp.reservations."data".hwAddress}";
value = apiEntry name;
})
targets)); # All machines with net-install
```
---
### PXE API Entry
```nix=
apiEntry = name:
pkgs.writeText "pixieboot-api-${name}"
(builtins.toJSON {
kernel = "file://${installer.config.system.build.kernel}/bzImage";
initrd = [ "file://${installer.config.system.build.netbootRamdisk}/initrd" ];
cmdline = concatStringsSep " " [
"init=${boot.toplevel}/init"
"loglevel=4"
"nixos.install=${install}" # Finally, the installer script...
"console=tty0"
"console=ttyS1,57600n8"
];
message = "NixOS Automatic Installer for ${name}";
})
```
---
### Installer script
```nix=
node = nodes.${name}.config.system.build;
install = pkgs.writeScript "install-${name}" ''
"${node.diskoScript}"
"${node.nixos-install}/bin/nixos-install" \
--root /mnt \
--system "${node.toplevel}" \
--no-channel-copy \
--no-root-password \
--verbose
${pkgs.ipmitool}/bin/ipmitool chassis bootdev disk
reboot
'';
```
---
# Thanks
* https://gogs.informatik.hs-fulda.de/hpc/nixcfg
{"title":"Bootstrapping clusters out of ~~nothing~~ nix","tags":"presentation","type":"slide","slideOptions":{"theme":"league","transition":"fade"}}