Skip to content

Commit

Permalink
Add support for 4k metal images
Browse files Browse the repository at this point in the history
First, add a new `buildextend-metal4k` command to create 4k disk images.
Then, teach `kola` and `cosa run` to read these images.

To test:

    host$ cosa run -I metal4k
    ...
    vm$ sudo fdisk -l /dev/vda
    ...
    Sector size (logical/physical): 4096 bytes / 4096 bytes
    ...

One potentially controversial bit here is that this requires a newer
libguestfs which isn't in f31 yet, so we pull it from f32 for now.

Closes: coreos/fedora-coreos-tracker#385
  • Loading branch information
jlebon committed Mar 12, 2020
1 parent 6dbb099 commit 3e60ef1
Show file tree
Hide file tree
Showing 11 changed files with 108 additions and 46 deletions.
10 changes: 10 additions & 0 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,16 @@ install_rpms() {
# xargs is part of findutils, which may not be installed
yum -y install /usr/bin/xargs

# We need a newer guestfish which supports 4k drives (XXX: tag into
# continuous tag?)
local libguestfs_koji="https://kojipkgs.fedoraproject.org//packages/libguestfs/1.42.0/1.fc32"
yum -y install \
${libguestfs_koji}/x86_64/libguestfs-1.42.0-1.fc32.x86_64.rpm \
${libguestfs_koji}/x86_64/libguestfs-xfs-1.42.0-1.fc32.x86_64.rpm \
${libguestfs_koji}/x86_64/libguestfs-tools-c-1.42.0-1.fc32.x86_64.rpm \
${libguestfs_koji}/noarch/libguestfs-tools-1.42.0-1.fc32.noarch.rpm \
${libguestfs_koji}/x86_64/perl-Sys-Guestfs-1.42.0-1.fc32.x86_64.rpm

# These are only used to build things in here. Today
# we ship these in the container too to make it easier
# to use the container as a development environment for itself.
Expand Down
1 change: 1 addition & 0 deletions mantle/cmd/kola/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ func init() {
sv(&kola.QEMUOptions.Firmware, "qemu-firmware", "bios", "Boot firmware: bios,uefi,uefi-secure")
sv(&kola.QEMUOptions.DiskImage, "qemu-image", "", "path to CoreOS disk image")
sv(&kola.QEMUOptions.DiskSize, "qemu-size", "", "Resize target disk via qemu-img resize [+]SIZE")
bv(&kola.QEMUOptions.Native4k, "qemu-native-4k", false, "Force 4k sectors for main disk")
bv(&kola.QEMUOptions.Nvme, "qemu-nvme", false, "Use NVMe for main disk")
bv(&kola.QEMUOptions.Swtpm, "qemu-swtpm", true, "Create temporary software TPM")
}
Expand Down
5 changes: 5 additions & 0 deletions mantle/cmd/kola/qemuexec.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,15 @@ func runQemuExec(cmd *cobra.Command, args []string) error {
if kola.QEMUOptions.Nvme {
channel = "nvme"
}
sectorSize := 0
if kola.QEMUOptions.Native4k {
sectorSize = 4096
}
if err = builder.AddPrimaryDisk(&platform.Disk{
BackingFile: kola.QEMUOptions.DiskImage,
Channel: channel,
Size: kola.QEMUOptions.DiskSize,
SectorSize: sectorSize,
}); err != nil {
return errors.Wrapf(err, "adding primary disk")
}
Expand Down
5 changes: 5 additions & 0 deletions mantle/platform/machine/unprivqemu/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,15 @@ func (qc *Cluster) NewMachineWithOptions(userdata *conf.UserData, options platfo
if qc.flight.opts.Nvme {
channel = "nvme"
}
sectorSize := 0
if qc.flight.opts.Native4k {
sectorSize = 4096
}
primaryDisk := platform.Disk{
BackingFile: qc.flight.opts.DiskImage,
Channel: channel,
Size: qc.flight.opts.DiskSize,
SectorSize: sectorSize,
}

if err = builder.AddPrimaryDisk(&primaryDisk); err != nil {
Expand Down
3 changes: 2 additions & 1 deletion mantle/platform/machine/unprivqemu/flight.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ type Options struct {

ForceConfigInjection bool

Nvme bool
Native4k bool
Nvme bool

//Option to create a temporary software TPM - true by default
Swtpm bool
Expand Down
19 changes: 14 additions & 5 deletions mantle/platform/qemu.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ type Disk struct {
BackingFile string // raw disk image to use. Incompatible with Size.
Channel string // virtio (default), nvme
DeviceOpts []string // extra options to pass to qemu. "serial=XXXX" makes disks show up as /dev/disk/by-id/virtio-<serial>
SectorSize int // if not 0, override disk sector size
}

type QemuInstance struct {
Expand Down Expand Up @@ -289,11 +290,16 @@ type coreosGuestfish struct {
remote string
}

func newGuestfish(diskImagePath string) (*coreosGuestfish, error) {
func newGuestfish(diskImagePath string, diskSectorSize int) (*coreosGuestfish, error) {
// Set guestfish backend to direct in order to avoid libvirt as backend.
// Using libvirt can lead to permission denied issues if it does not have access
// rights to the qcow image
cmd := exec.Command("guestfish", "--listen", "-a", diskImagePath)
guestfish_args := []string{"--listen"}
if diskSectorSize != 0 {
guestfish_args = append(guestfish_args, fmt.Sprintf("--blocksize=%d", diskSectorSize))
}
guestfish_args = append(guestfish_args, "-a", diskImagePath)
cmd := exec.Command("guestfish", guestfish_args...)
cmd.Env = append(os.Environ(), "LIBGUESTFS_BACKEND=direct")
// make sure it inherits stderr so we see any error message
cmd.Stderr = os.Stderr
Expand Down Expand Up @@ -352,8 +358,8 @@ func (gf *coreosGuestfish) destroy() {

// setupIgnition copies the ignition file inside the disk image and/or sets
// networking kernel arguments
func setupIgnition(confPath string, knetargs string, diskImagePath string) error {
gf, err := newGuestfish(diskImagePath)
func setupIgnition(confPath string, knetargs string, diskImagePath string, diskSectorSize int) error {
gf, err := newGuestfish(diskImagePath, diskSectorSize)
if err != nil {
return err
}
Expand Down Expand Up @@ -438,7 +444,7 @@ func (builder *QemuBuilder) addDiskImpl(disk *Disk, primary bool) error {
// requested, inject via libguestfs on the primary disk.
requiresInjection := builder.Config != "" && (builder.ForceConfigInjection || !builder.supportsFwCfg())
if requiresInjection || builder.IgnitionNetworkKargs != "" {
if err = setupIgnition(builder.Config, builder.IgnitionNetworkKargs, dstFileName); err != nil {
if err = setupIgnition(builder.Config, builder.IgnitionNetworkKargs, dstFileName, disk.SectorSize); err != nil {
return errors.Wrapf(err, "ignition injection with guestfs failed")
}
}
Expand Down Expand Up @@ -466,6 +472,9 @@ func (builder *QemuBuilder) addDiskImpl(disk *Disk, primary bool) error {
if channel == "" {
channel = "virtio"
}
if disk.SectorSize != 0 {
diskOpts = append(diskOpts, fmt.Sprintf("physical_block_size=%[1]d,logical_block_size=%[1]d", disk.SectorSize))
}
builder.addQcow2DiskFd(fd, channel, diskOpts)
return nil
}
Expand Down
42 changes: 28 additions & 14 deletions src/cmd-buildextend-metal
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ dn=$(dirname "$0")
# image (qemu). `buildextend-qemu` is a symlink to `buildextend-metal`.
case "$(basename "$0")" in
"cmd-buildextend-metal") image_type=metal;;
"cmd-buildextend-metal4k") image_type=metal4k;;
"cmd-buildextend-dasd") image_type=dasd;;
"cmd-buildextend-qemu") image_type=qemu;;
*) fatal "called as unexpected name $0";;
Expand Down Expand Up @@ -137,9 +138,8 @@ img=${name}-${build}-${image_type}.${basearch}.${image_format}
path=${PWD}/${img}

ignition_platform_id="${image_type}"
# dasd is a different disk format, but it's still metal. Just like
# if in the future we introduce a 4k sector size x86_64 image type (metal-4k).
if [ "${image_type}" = dasd ]; then
# dasd and metal4k are different disk formats, but they're still metal
if [ "${image_type}" = dasd ] || [ "${image_type}" = metal4k ]; then
ignition_platform_id=metal
fi

Expand Down Expand Up @@ -169,17 +169,25 @@ rootfs_size="$(jq '."estimate-mb".final' "$PWD/tmp/ostree-size.json")"
image_size="$(( rootfs_size + 513 ))M"
echo "Disk size estimated to ${image_size}"

disk_args=()

# For bare metal and dasd images, we use the estimated image size. For IaaS/virt, we get it from
# image.yaml because we want a "default" disk size that has some free space.
if [[ "${image_type}" = metal || "${image_type}" = dasd ]]; then
# Unset the root size, which will inherit from the image size
rootfs_size=0
else
image_size="$(python3 -c 'import sys, yaml; print(yaml.safe_load(sys.stdin)["size"])' < "$configdir/image.yaml")G"
rootfs_size="${rootfs_size}M"
fi
case "${image_type}" in
metal*|dasd)
# Unset the root size, which will inherit from the image size
rootfs_size=0
;;
qemu)
image_size="$(python3 -c 'import sys, yaml; print(yaml.safe_load(sys.stdin)["size"])' < "$configdir/image.yaml")G"
rootfs_size="${rootfs_size}M"
;;
*) fatal "unreachable image_type ${image_type}";;
esac

disk_args=()
if [ "${image_type}" == metal4k ]; then
disk_args+=("--no-x86-bios-partition")
fi

set -x
# Extract the target kernel config, which may inform how we build disks.
Expand Down Expand Up @@ -225,12 +233,18 @@ ref_arg=${ref}
if [ -n "${ref_is_temp}" ]; then
ref_arg=${commit}
fi

target_drive=("-drive" "if=virtio,id=target,format=${image_format},file=${path}.tmp,cache=unsafe")
if [[ $image_format == raw && $image_type == dasd ]]; then
# we need 4096 block size for ECKD DASD and (obviously) metal4k
if [[ $image_type == dasd || $image_type == metal4k ]]; then
device_type=virtio-blk
if [[ $image_type == dasd ]]; then
device_type=virtio-blk-ccw
fi
target_drive=("-drive" "if=none,id=target,format=${image_format},file=${path}.tmp,cache=unsafe" \
# we need 4096 block size for ECKD DASD
"-device" "virtio-blk-ccw,drive=target,physical_block_size=4096,logical_block_size=4096,scsi=off")
"-device" "${device_type},drive=target,physical_block_size=4096,logical_block_size=4096,scsi=off")
fi

runvm "${target_drive[@]}" -- \
/usr/lib/coreos-assembler/create_disk.sh \
--disk /dev/vda \
Expand Down
1 change: 1 addition & 0 deletions src/cmd-buildextend-metal4k
10 changes: 9 additions & 1 deletion src/cmd-run
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ FIRMWARE=bios
USAGE="Usage: $0 [-d /path/to/disk.qcow2] [--] [qemu options...]
Options:
-b --buildid Target buildid (default latest)
-I --imgtype Target image type (qemu, metal, etc. Default qemu)
-I --imgtype Target image type (qemu, metal, metal4k, etc. Default qemu)
-d DISK Root disk drive (won't be changed by default)
--disk-channel TYPE Communication mechanism for root device: virtio, nvme
-i FILE File containing an Ignition config to merge into the default config
Expand Down Expand Up @@ -259,6 +259,14 @@ case "${DISK_CHANNEL}" in
*) die "Invalid --disk-channel ${DISK_CHANNEL}" ;;
esac

if [ "${IMAGE_TYPE}" == metal4k ]; then
kola_args+=("--qemu-native-4k")
# native 4k requires a UEFI bootloader
if [ "${FIRMWARE}" == bios ]; then
FIRMWARE=uefi
fi
fi

case "${FIRMWARE}" in
bios) ;;
*) kola_args+=("--qemu-firmware=${FIRMWARE}")
Expand Down
2 changes: 1 addition & 1 deletion src/coreos-assembler
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ cmd=${1:-}
build_commands="init fetch build run prune clean list"
# commands more likely to be used in a prod pipeline only
advanced_build_commands="buildprep buildupload oscontainer"
buildextend_commands="aws azure gcp ibmcloud installer live metal openstack qemu vmware vultr exoscale"
buildextend_commands="aws azure gcp ibmcloud installer live metal metal4k openstack qemu vmware vultr exoscale"
utility_commands="tag sign compress koji-upload kola aws-replicate remote-prune"
other_commands="shell meta"
if [ -z "${cmd}" ]; then
Expand Down
56 changes: 32 additions & 24 deletions src/create_disk.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Options:
--rootfs-size: Create the root filesystem with specified size
--boot-verity: Provide this to enable ext4 fs-verity for /boot
--rootfs: xfs|ext4verity|luks
--no-x86-bios-partition: don't create a BIOS partition on x86_64
You probably don't want to run this script by hand. This script is
run as part of 'coreos-assembler build'.
Expand All @@ -39,26 +40,28 @@ EOC
rootfs_size="0"
boot_verity=0
rootfs_type="xfs"
x86_bios_partition=1
extrakargs=""

while [ $# -gt 0 ];
do
flag="${1}"; shift;
case "${flag}" in
--disk) disk="${1}"; shift;;
--buildid) buildid="${1}"; shift;;
--imgid) imgid="${1}"; shift;;
--grub-script) grub_script="${1}"; shift;;
--help) usage; exit;;
--kargs) extrakargs="${extrakargs} ${1}"; shift;;
--osname) os_name="${1}"; shift;;
--ostree-ref) ref="${1}"; shift;;
--ostree-remote) remote_name="${1}"; shift;;
--ostree-repo) ostree="${1}"; shift;;
--save-var-subdirs) save_var_subdirs="${1}"; shift;;
--rootfs-size) rootfs_size="${1}"; shift;;
--boot-verity) boot_verity=1;;
--rootfs) rootfs_type="${1}" shift;;
--disk) disk="${1}"; shift;;
--buildid) buildid="${1}"; shift;;
--imgid) imgid="${1}"; shift;;
--grub-script) grub_script="${1}"; shift;;
--help) usage; exit;;
--kargs) extrakargs="${extrakargs} ${1}"; shift;;
--osname) os_name="${1}"; shift;;
--ostree-ref) ref="${1}"; shift;;
--ostree-remote) remote_name="${1}"; shift;;
--ostree-repo) ostree="${1}"; shift;;
--save-var-subdirs) save_var_subdirs="${1}"; shift;;
--rootfs-size) rootfs_size="${1}"; shift;;
--boot-verity) boot_verity=1;;
--rootfs) rootfs_type="${1}" shift;;
--no-x86-bios-partition) x86_bios_partition=0;;
*) echo "${flag} is not understood."; usage; exit 10;;
esac;
done
Expand Down Expand Up @@ -99,12 +102,15 @@ if [ "${rootfs_size}" != "0" ]; then
fi
case "$arch" in
x86_64)
sgdisk -Z $disk \
set -- -Z $disk \
-U 00000000-0000-4000-a000-000000000001 \
-n ${BOOTPN}:0:+384M -c ${BOOTPN}:boot \
-n 2:0:+127M -c 2:EFI-SYSTEM -t 2:C12A7328-F81F-11D2-BA4B-00A0C93EC93B \
-n 3:0:+1M -c 3:BIOS-BOOT -t 3:21686148-6449-6E6F-744E-656564454649 \
-n ${ROOTPN}:0:${rootfs_size} -c ${ROOTPN}:root -t ${ROOTPN}:0FC63DAF-8483-4772-8E79-3D69D8477DE4
-n 2:0:+127M -c 2:EFI-SYSTEM -t 2:C12A7328-F81F-11D2-BA4B-00A0C93EC93B
if [ "${x86_bios_partition}" = 1 ]; then
set -- "$@" -n 3:0:+1M -c 3:BIOS-BOOT -t 3:21686148-6449-6E6F-744E-656564454649
fi
set -- "$@" -n ${ROOTPN}:0:${rootfs_size} -c ${ROOTPN}:root -t ${ROOTPN}:0FC63DAF-8483-4772-8E79-3D69D8477DE4
sgdisk "$@"
sgdisk -p "$disk"
EFIPN=2
;;
Expand Down Expand Up @@ -343,12 +349,14 @@ case "$arch" in
x86_64)
# UEFI
install_uefi
# And BIOS grub in addition. See also
# https://github.com/coreos/fedora-coreos-tracker/issues/32
grub2-install \
--target i386-pc \
--boot-directory $rootfs/boot \
$disk
if [ "${x86_bios_partition}" = 1 ]; then
# And BIOS grub in addition. See also
# https://github.com/coreos/fedora-coreos-tracker/issues/32
grub2-install \
--target i386-pc \
--boot-directory $rootfs/boot \
$disk
fi
;;
aarch64)
# Our aarch64 is UEFI only.
Expand Down

0 comments on commit 3e60ef1

Please sign in to comment.