From de8bc9506033879fcad260352443bcc660e59809 Mon Sep 17 00:00:00 2001 From: Jan Orel Date: Wed, 7 Oct 2020 10:57:43 +0200 Subject: [PATCH] F #1644: Iprove kvm-ssh live migration (#295) (backport) --- src/vmm_mad/remotes/kvm/migrate | 185 +++++++++++++++++++++++++++++++- 1 file changed, 180 insertions(+), 5 deletions(-) diff --git a/src/vmm_mad/remotes/kvm/migrate b/src/vmm_mad/remotes/kvm/migrate index 30a035fc3be..1a4b12d4b40 100755 --- a/src/vmm_mad/remotes/kvm/migrate +++ b/src/vmm_mad/remotes/kvm/migrate @@ -19,8 +19,81 @@ source $(dirname $0)/../../etc/vmm/kvm/kvmrc source $(dirname $0)/../../scripts_common.sh -deploy_id=$1 -dest_host=$2 +get_qemu_img_version() { + qemu-img --version | head -1 | awk '{print $3}' | \ + sed -e 's/[^0-9\.]//' | awk -F. '{ printf("%d%03d%03d\n", $1,$2,$3); }' +} + +is_readonly() { + local DOMAIN=$1 + local DISK=$2 + + READ_ONLY=$(awk 'gsub(/[\0]/, x)' \ + <( virsh --connect $LIBVIRT_URI dumpxml $DOMAIN | \ + $XPATH --stdin --subtree \ + "//domain/devices/disk[source/@file='$DISK']/readonly")) + + [ "$READ_ONLY" = '' ] +} + +get_size_and_format_of_disk_img() { + local QEMU_IMG_PATH="$1" + local PARAM="$2" + + if [ -L "$QEMU_IMG_PATH" ]; then + # symlink, assume network disk + echo unknown network-disk + return + fi + + IMG_INFO=$(qemu-img info $PARAM "$QEMU_IMG_PATH" --output json) + + if [ -z "$IMG_INFO" ]; then + echo "Failed to get image info for $QEMU_IMG_PATH" + exit 1 + fi + + SIZE=$(echo $IMG_INFO | sed -nE 's/^.*virtual-size.: ([0-9]+).*/\1/p') + FORMAT=$(echo $IMG_INFO | sed -nE 's/^.*format.: "([a-z0-9]+)".*/\1/p') + + if [ -z "$SIZE" ] || [ -z "$FORMAT" ]; then + echo "Failed to get image $QEMU_IMG_PATH size or format" + exit 1 + fi + + echo $SIZE $FORMAT +} + +create_target_disk_img() { + local DEST_HOST=$1 + local QEMU_IMG_PATH="$2" + local SIZE="$3" + + ssh_monitor_and_log "$DEST_HOST" \ + "qemu-img create -f qcow2 '$QEMU_IMG_PATH' '$SIZE'" \ + "Failed to create new qcow image for $QEMU_IMG_PATH" +} + +STDIN=$(cat -) +DEPLOY_ID=$1 +DEST_HOST=$2 +DISKS=$(virsh --connect $LIBVIRT_URI domblklist "$DEPLOY_ID" \ + | tail -n+3 | grep -v "^$" | awk '{print $1 "," $2}') + + +unset i j XPATH_ELEMENTS +while IFS= read -r -d '' element; do + XPATH_ELEMENTS[i++]="$element" +done < <(echo $STDIN| $XPATH \ + /VMM_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/SHARED \ + /VMM_DRIVER_ACTION_DATA/DISK_TARGET_PATH) + +SHARED="${XPATH_ELEMENTS[j++]}" +VM_DIR="${XPATH_ELEMENTS[j++]}" + + +# use "force-share" param for qemu >= 2.10 +[ "$(get_qemu_img_version)" -ge 2010000 ] && QEMU_IMG_PARAM="-U" # migration can't be done with domain snapshots, drop them first snaps=$(monitor_and_log \ @@ -33,11 +106,113 @@ for snap in $snaps; do "Failed to delete snapshot $snap from $deploy_id" done -# do live migration, but cleanup target host in case of error -virsh --connect $LIBVIRT_URI migrate --live $MIGRATE_OPTIONS $deploy_id $QEMU_PROTOCOL://$dest_host/system +if [ "$SHARED" = "YES" ]; then + virsh --connect $LIBVIRT_URI migrate \ + --live $MIGRATE_OPTIONS $DEPLOY_ID $QEMU_PROTOCOL://$DEST_HOST/system + + RC=$? +else + if [[ -z "$DISKS" ]]; then + error_message "No disks discovered on the VM" + exit 1 + fi -RC=$? + ssh_monitor_and_log "$DEST_HOST" "mkdir -p '$VM_DIR'" \ + "Failed to make remote directory $VM_DIR image" + + MIGRATE_DISKS="" + + for DISK_STR in $DISKS; do + + DISK_DEV=${DISK_STR/,*/} + DISK_PATH=${DISK_STR/*,/} + + read -r SIZE FORMAT <<<"$(get_size_and_format_of_disk_img "$DISK_PATH" "$QEMU_IMG_PARAM")" + + if [ "$FORMAT" = "raw" ]; then + if ! is_readonly $DEPLOY_ID $DISK_PATH; then + RAW_DISKS+=" $DISK_PATH" + MIGRATE_DISKS+="${MIGRATE_DISKS:+,}${DISK_DEV}" + fi + + # do initial rsync + multiline_exec_and_log "tar -cSf - $DISK_PATH | $SSH $DEST_HOST 'tar xSf - -C / '" \ + "Failed to rsync disk $DISK_PATH to $DEST_HOST:$DISK_PATH" + + elif [ "$FORMAT" = "qcow2" ]; then + create_target_disk_img "$DEST_HOST" "$DISK_PATH" "$SIZE" + MIGRATE_DISKS+="${MIGRATE_DISKS:+,}${DISK_DEV}" + + elif [ "$FORMAT" = "network-disk" ]; then + true # skip + fi + + # copy disk snapshots + if [ -d "${DISK_PATH}.snap" ] || [ -L "${DISK_PATH}.snap" ]; then + multiline_exec_and_log "tar -cSf - $DISK_PATH.snap | $SSH $DEST_HOST 'tar xSf - -C / '" \ + "Failed to rsync disk snapshot ${DISK_PATH}.snap to $DEST_HOST" + fi + + # recreate symlinks + if [ -L "$DISK_PATH" ]; then + LINK_TARGET=$(readlink $DISK_PATH) + ssh_exec_and_log "$DEST_HOST" "[ -L \"$DISK_PATH\" ] || ln -s \"$LINK_TARGET\" \"$DISK_PATH\"" + "Failed to create symlink $DISK_PATH -> $LINK_TARGET on $DEST_HOST" + fi + done + + # freeze/suspend domain and rsync raw disks again + if [ -n "$RAW_DISKS" ]; then + if virsh --connect $LIBVIRT_URI domfsfreeze $DEPLOY_ID; then + # local domfsthaw for the case migration fails + trap "virsh --connect $LIBVIRT_URI domfsthaw $DEPLOY_ID" EXIT TERM INT HUP + FREEZE="yes" + else + if virsh --connect $LIBVIRT_URI suspend $DEPLOY_ID; then + # local resume for the case migration fails + trap "virsh --connect $LIBVIRT_URI resume $DEPLOY_ID" EXIT TERM INT HUP + SUSPEND="yes" + else + error_message "Could not freeze or suspend the domain" + exit 1 + fi + fi + + for DISK in $RAW_DISKS; do + multiline_exec_and_log "tar -cSf - $DISK | $SSH $DEST_HOST 'tar xSf - -C / '" \ + "Failed to rsync disk $DISK to $DEST_HOST:$DISK" + done + fi + + # Enumerate disks to copy + if [ -n "$MIGRATE_DISKS" ]; then + DISK_OPTS="--copy-storage-all --migrate-disks ${MIGRATE_DISKS}" + fi + + virsh --connect $LIBVIRT_URI migrate \ + --live $MIGRATE_OPTIONS $DEPLOY_ID $QEMU_PROTOCOL://$DEST_HOST/system \ + $DISK_OPTS + RC=$? + + # remote domfsthaw/resume, give it time + if [ $RC -eq 0 ]; then + if [ "$FREEZE" = "yes" ]; then + for I in $(seq 5); do + virsh --connect $QEMU_PROTOCOL://$DEST_HOST/system domfsthaw $DEPLOY_ID \ + && break + sleep 2 + done + elif [ "$SUSPEND" = "yes" ]; then + for I in $(seq 5); do + virsh --connect $QEMU_PROTOCOL://$DEST_HOST/system resume $DEPLOY_ID \ + && break + sleep 2 + done + fi + fi +fi +# cleanup target host in case of error if [ $RC -ne 0 ]; then for CLEAN_OP in destroy undefine; do virsh --connect $QEMU_PROTOCOL://$dest_host/system "${CLEAN_OP}" $deploy_id >/dev/null 2>&1