Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cherry Pick PR857 #860

Merged
merged 2 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ __pycache__
/openapi/*
!/openapi/Cargo.toml
!/openapi/build.rs
!/openapi/src/lib.rs
/tests/bdd/autogen/
/terraform/cluster/ansible-hosts
/terraform/cluster/current_user.txt
Expand Down
1 change: 1 addition & 0 deletions nix/pkgs/control-plane/cargo-project.nix
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ let
"k8s"
"openapi/Cargo.toml"
"openapi/build.rs"
"openapi/src/lib.rs"
"rpc"
"scripts/rust/generate-openapi-bindings.sh"
"scripts/rust/branch_ancestor.sh"
Expand Down
1 change: 1 addition & 0 deletions openapi/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,6 @@ fn main() {
// directive against files created during the build of build.rs??
// https://doc.rust-lang.org/cargo/reference/build-scripts.html#rerun-if-changed
// println!("cargo:rerun-if-changed=.");
// println!("cargo:rerun-if-changed=./src/lib_.rs");
// println!("cargo:rerun-if-changed=version.txt");
}
1 change: 1 addition & 0 deletions openapi/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include!("lib_.rs");
5 changes: 5 additions & 0 deletions scripts/rust/generate-openapi-bindings.sh
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,11 @@ fi
# Cleanup the existing autogenerated code
( cd "$ROOTDIR"; git clean -f -e "!version.txt" -X "$REAL_TARGET" || true )

( cd "$tmpd"; mv src/lib.rs src/lib_.rs; )
if [ ! -f "$TARGET/src/lib.rs" ]; then
git restore "$TARGET/src/lib.rs"
fi
mv "$TARGET/src/lib.rs" "$tmpd/src/lib.rs"
mv "$tmpd"/* "$TARGET"/
rm -rf "$tmpd"

Expand Down
14 changes: 11 additions & 3 deletions terraform/cluster/main.tf
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
module "k8s" {
source = "./mod/k8s"

num_nodes = var.num_nodes
num_nodes = local.num_nodes
master_nodes = local.master_nodes
ssh_user = local.ssh_user
private_key_path = local.ssh_key_priv
node_list = module.provider.node_list
Expand All @@ -21,12 +22,17 @@ module "provider" {
# lxd and libvirt
ssh_user = local.ssh_user
ssh_key = local.ssh_key_pub
num_nodes = var.num_nodes
master_nodes = local.master_nodes
worker_nodes = var.worker_nodes
num_nodes = local.num_nodes
worker_memory = var.worker_memory
worker_vcpu = var.worker_vcpu
master_memory = var.master_memory
master_vcpu = var.master_vcpu
hostname_formatter = var.hostname_formatter
master_fmt = format("%s-%%d", var.master_prefix)
worker_fmt = format("%s-%%d", var.worker_prefix)
lxc_cached_image = var.lxc_cached_image
lxc_image = var.lxc_image

# libvirt
image_path = var.image_path
Expand All @@ -47,6 +53,8 @@ locals {
ssh_key_priv = var.ssh_key_priv == "" ? pathexpand("~/.ssh/id_rsa") : var.ssh_key_priv
ssh_user = var.ssh_user == "" ? data.local_file.current_username.content : var.ssh_user
qcow2_image = var.qcow2_image == "" ? pathexpand("~/terraform_images/ubuntu-20.04-server-cloudimg-amd64.img") : pathexpand(var.qcow2_image)
master_nodes = 1
num_nodes = var.worker_nodes + local.master_nodes
}

resource "null_resource" "generate_current_username" {
Expand Down
2 changes: 2 additions & 0 deletions terraform/cluster/mod/k8s/kubeadm_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,5 @@ apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
cgroupDriver: "systemd"
failSwapOn: false
#cpuManagerPolicy: "static"
#reservedSystemCPUs: "1"
3 changes: 2 additions & 1 deletion terraform/cluster/mod/k8s/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ variable "k8s_cluster_token" {

variable "overlay_cidr" {}
variable "num_nodes" {}
variable "master_nodes" {}

variable "ssh_user" {}
variable "private_key_path" {}
Expand Down Expand Up @@ -38,7 +39,7 @@ resource "null_resource" "k8s" {

provisioner "remote-exec" {
inline = [
count.index == 0 ? local.master : local.node
count.index < var.master_nodes ? local.master : local.node
]
}
}
Expand Down
24 changes: 15 additions & 9 deletions terraform/cluster/mod/libvirt/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@

variable "image_path" {}
variable "num_nodes" {}
variable "master_nodes" {}
variable "worker_nodes" {}
variable "worker_memory" {}
variable "worker_vcpu" {}
variable "master_memory" {}
variable "master_vcpu" {}
variable "hostname_formatter" {}
variable "master_fmt" {}
variable "worker_fmt" {}
variable "ssh_user" {}
variable "ssh_key" {}
variable "private_key_path" {}
Expand All @@ -16,6 +19,9 @@ variable "qcow2_image" {}
variable "network_mode" {}
variable "bridge_name" {}

variable "lxc_image" {}
variable "lxc_cached_image" {}

provider "libvirt" {
uri = "qemu:///system"
}
Expand All @@ -37,7 +43,7 @@ resource "libvirt_volume" "ubuntu-qcow2" {

# we want to, based of the first image, create 3 separate images each with their own cloud-init settings
resource "libvirt_volume" "ubuntu-qcow2-resized" {
name = format(var.hostname_formatter, count.index + 1)
name = count.index < var.master_nodes ? format(var.master_fmt, count.index + 1) : format(var.worker_fmt, count.index + 1 - var.master_nodes)
count = var.num_nodes
base_volume_id = libvirt_volume.ubuntu-qcow2.id
pool = libvirt_pool.ubuntu-pool.name
Expand All @@ -50,15 +56,15 @@ resource "libvirt_volume" "pool-disk" {
count = var.num_nodes
pool = libvirt_pool.ubuntu-pool.name
format = "raw"
size = var.pooldisk_size
size = count.index < var.master_nodes ? 0 : var.pooldisk_size
}

locals {
# user data that we pass to cloud init that reads variables from variables.tf and
# passes them to a template file to be filled in
user_data = [
for node_index in range(var.num_nodes) : templatefile("${path.module}/cloud_init.tmpl", {
ssh_user = var.ssh_user, ssh_key = var.ssh_key, hostname = format(var.hostname_formatter, node_index + 1)
ssh_user = var.ssh_user, ssh_key = var.ssh_key, hostname = node_index < var.master_nodes ? format(var.master_fmt, node_index + 1) : format(var.worker_fmt, node_index + 1 - var.master_nodes)
})
]
# likewise for networking
Expand Down Expand Up @@ -123,9 +129,9 @@ resource "libvirt_cloudinit_disk" "commoninit" {
# create the actual VMs for the cluster
resource "libvirt_domain" "ubuntu-domain" {
count = var.num_nodes
name = format(var.hostname_formatter, count.index + 1)
memory = count.index == 0 ? var.master_memory : var.worker_memory
vcpu = count.index == 0 ? var.master_vcpu : var.worker_vcpu
name = count.index < var.master_nodes ? format(var.master_fmt, count.index + 1) : format(var.worker_fmt, count.index + 1 - var.master_nodes)
memory = count.index < var.master_nodes ? var.master_memory : var.worker_memory
vcpu = count.index < var.master_nodes ? var.master_vcpu : var.worker_vcpu
autostart = true

cloudinit = libvirt_cloudinit_disk.commoninit[count.index].id
Expand All @@ -152,7 +158,7 @@ resource "libvirt_domain" "ubuntu-domain" {

network_interface {
network_name = var.network_mode == "default" ? "default" : "k8snet"
hostname = format(var.hostname_formatter, count.index + 1)
hostname = count.index < var.master_nodes ? format(var.master_fmt, count.index + 1) : format(var.worker_fmt, count.index + 1 - var.master_nodes)
wait_for_lease = true
}

Expand Down Expand Up @@ -185,7 +191,7 @@ output "ks-cluster-nodes" {
${libvirt_domain.ubuntu-domain.0.name} ansible_host=${libvirt_domain.ubuntu-domain.0.network_interface.0.addresses.0} ansible_user=${var.ssh_user} ansible_ssh_private_key_file=${var.private_key_path} ansible_ssh_common_args='-o StrictHostKeyChecking=no'

[nodes]%{for ip in libvirt_domain.ubuntu-domain.*~}
%{if ip.name != "${format(var.hostname_formatter, 1)}"}${ip.name} ansible_host=${ip.network_interface.0.addresses.0} ansible_user=${var.ssh_user} ansible_ssh_private_key_file=${var.private_key_path} ansible_ssh_common_args='-o StrictHostKeyChecking=no'%{endif}
%{if ip.name != "${format(var.master_fmt, 1)}"}${ip.name} ansible_host=${ip.network_interface.0.addresses.0} ansible_user=${var.ssh_user} ansible_ssh_private_key_file=${var.private_key_path} ansible_ssh_common_args='-o StrictHostKeyChecking=no'%{endif}
%{endfor~}
EOT
}
Expand Down
46 changes: 30 additions & 16 deletions terraform/cluster/mod/lxd/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@ provider "lxd" {
}

variable "num_nodes" {}
variable "master_nodes" {}
variable "worker_nodes" {}
variable "worker_memory" {}
variable "worker_vcpu" {}
variable "master_memory" {}
variable "master_vcpu" {}
variable "ssh_user" {}
variable "ssh_key" {}
variable "hostname_formatter" {}
variable "master_fmt" {}
variable "worker_fmt" {}
variable "private_key_path" {}

variable "pooldisk_size" {}
Expand All @@ -20,21 +23,26 @@ variable "image_path" {}
variable "disk_size" {}
variable "qcow2_image" {}

variable "lxc_image" {}
variable "lxc_cached_image" {}

resource "lxd_cached_image" "ubuntu" {
source_remote = "ubuntu"
source_image = "jammy/amd64"
source_remote = split(":", var.lxc_image)[0]
source_image = split(":", var.lxc_image)[1]
count = var.lxc_cached_image ? 1 : 0
}

locals {
# user data that we pass to cloud init that reads variables from variables.tf and
# passes them to a template file to be filled in
user_data = [
for node_index in range(var.num_nodes) : templatefile("${path.module}/cloud_init.tmpl", {
ssh_user = var.ssh_user, ssh_key = var.ssh_key, hostname = format(var.hostname_formatter, node_index + 1)
ssh_user = var.ssh_user, ssh_key = var.ssh_key, hostname = node_index < var.master_nodes ? format(var.master_fmt, node_index + 1) : format(var.worker_fmt, node_index + 1 - var.master_nodes)
})
]
# likewise for networking
network_config = templatefile("${path.module}/network_config.cfg", {})
use_ipv4 = true
}

resource "null_resource" "lxd_init" {
Expand All @@ -44,21 +52,26 @@ resource "null_resource" "lxd_init" {
}

resource "null_resource" "lxd_stop_force" {
triggers = {
master_nodes = var.master_nodes
master_fmt = var.master_fmt
worker_fmt = var.worker_fmt
}
provisioner "local-exec" {
when = destroy
# todo: should use hostname_formatter
command = format("lxc stop ksnode-%d --force", count.index + 1)
command = format("lxc stop %s --force", count.index < self.triggers.master_nodes ? format(self.triggers.master_fmt, count.index + 1) : format(self.triggers.worker_fmt, count.index + 1 - self.triggers.master_nodes))
}
count = var.num_nodes
depends_on = [
lxd_container.c8s
lxd_instance.c8s
]
}

resource "lxd_container" "c8s" {
resource "lxd_instance" "c8s" {
count = var.num_nodes
name = format(var.hostname_formatter, count.index + 1)
image = lxd_cached_image.ubuntu.fingerprint
name = count.index < var.master_nodes ? format(var.master_fmt, count.index + 1) : format(var.worker_fmt, count.index + 1 - var.master_nodes)
image = var.lxc_cached_image ? lxd_cached_image.ubuntu[0].fingerprint : var.lxc_image
ephemeral = false

# be careful with raw.lxc it has to be key=value\nkey=value
Expand All @@ -74,9 +87,9 @@ resource "lxd_container" "c8s" {
}

limits = {
memory = format("%dMiB", count.index == 0 ? var.master_memory : var.worker_memory)
memory = format("%dMiB", count.index < var.master_nodes ? var.master_memory : var.worker_memory)
# For the moment this doesn't as io-engine then can't set its core affinity...
# cpu = count.index == 0 ? var.master_vcpu : var.worker_vcpu
# cpu = count.index < var.master_nodes ? var.master_vcpu : var.worker_vcpu
}

device {
Expand All @@ -93,7 +106,7 @@ resource "lxd_container" "c8s" {
connection {
type = "ssh"
user = var.ssh_user
host = self.ip_address
host = local.use_ipv4 ? self.ipv4_address : self.ipv6_address
private_key = file(var.private_key_path)
}
}
Expand All @@ -106,22 +119,23 @@ resource "lxd_container" "c8s" {
output "ks-cluster-nodes" {
value = <<EOT
[master]
${lxd_container.c8s.0.name} ansible_host=${lxd_container.c8s.0.ip_address} ansible_user=${var.ssh_user} ansible_ssh_private_key_file=${var.private_key_path} ansible_ssh_common_args='-o StrictHostKeyChecking=no'
${lxd_instance.c8s.0.name} ansible_host=${local.use_ipv4 ? lxd_instance.c8s.0.ipv4_address : lxd_instance.c8s.0.ipv6_address} ansible_user=${var.ssh_user} ansible_ssh_private_key_file=${var.private_key_path} ansible_ssh_common_args='-o StrictHostKeyChecking=no'

[nodes]%{for ip in lxd_container.c8s.*~}
%{if ip.name != "${format(var.hostname_formatter, 1)}"}${ip.name} ansible_host=${ip.ip_address} ansible_user=${var.ssh_user} ansible_ssh_private_key_file=${var.private_key_path} ansible_ssh_common_args='-o StrictHostKeyChecking=no'%{endif}
[nodes]%{for ip in lxd_instance.c8s.*~}
%{if ip.name != "${format(var.worker_fmt, 1)}"}${ip.name} ansible_host=${local.use_ipv4 ? ip.ipv4_address : ip.ipv6_address} ansible_user=${var.ssh_user} ansible_ssh_private_key_file=${var.private_key_path} ansible_ssh_common_args='-o StrictHostKeyChecking=no'%{endif}
%{endfor~}
EOT
}

output "node_list" {
value = lxd_container.c8s.*.ip_address
value = local.use_ipv4 ? lxd_instance.c8s.*.ipv4_address : lxd_instance.c8s.*.ipv6_address
}

terraform {
required_providers {
lxd = {
source = "terraform-lxd/lxd"
version = ">= 2.0.0"
}
}
}
35 changes: 25 additions & 10 deletions terraform/cluster/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,19 @@ variable "pooldisk_size" {
default = 10737418240
}

variable "hostname_formatter" {
variable "master_prefix" {
type = string
default = "ksnode-%d"
default = "ksmaster"
}
variable "worker_prefix" {
type = string
default = "ksworker"
}

variable "num_nodes" {
variable "worker_nodes" {
type = number
default = 3
description = "The number of nodes to create (should be > 1)"
default = 2
description = "The number of worker to create"
}

variable "network_mode" {
Expand Down Expand Up @@ -79,25 +83,25 @@ variable "nr_hugepages" {
variable "worker_memory" {
type = number
default = 6144
description = "Amount of memory (MiB) allocated to each worker node - only needed for libvirt provider"
description = "Amount of memory (MiB) allocated to each worker node"
}

variable "worker_vcpu" {
type = number
default = 3
description = "Virtual CPUs allocated to each worker node - only needed for libvirt provider"
description = "Virtual CPUs allocated to each worker node"
}

variable "master_memory" {
type = number
default = 3192
description = "Amount of memory (MiB) allocated to the master node - only needed for libvirt provider"
default = 2048
description = "Amount of memory (MiB) allocated to the master node"
}

variable "master_vcpu" {
type = number
default = 2
description = "Virtual CPUs allocated to the master node - only needed for libvirt provider"
description = "Virtual CPUs allocated to the master node"
}

variable "kubernetes_version" {
Expand All @@ -124,3 +128,14 @@ variable "kubernetes_cni" {
# default = "https://raw.githubusercontent.com/cloudnativelabs/kube-router/v1.5.4/daemonset/kubeadm-kuberouter.yaml"
default = "https://raw.githubusercontent.com/flannel-io/flannel/v0.21.5/Documentation/kube-flannel.yml"
}

variable "lxc_image" {
type = string
description = "Image in the format: [<remote>:]<image>. Examples: images:busybox/1.36.1 ubuntu:22.04"
default = "ubuntu:22.04"
}
variable "lxc_cached_image" {
type = bool
description = "Use a terraform cached image"
default = false
}