From 3109d38ba6442a028e5399bdfc18fbaeb36d8dc0 Mon Sep 17 00:00:00 2001 From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com> Date: Fri, 7 Feb 2025 16:26:27 +1000 Subject: [PATCH 1/3] Add op monitor to primitives It is no longer applied automatically bsc#1231386 jsc#DOCTEAM-1698 --- xml/article_pacemaker_remote.xml | 3 ++- xml/geo_ip_i.xml | 3 ++- xml/geo_resources_i.xml | 3 ++- xml/ha_configuring_resources.xml | 3 ++- xml/ha_fencing.xml | 10 ++++++---- xml/ha_gfs2.xml | 3 ++- xml/ha_ocfs2.xml | 3 ++- xml/ha_resource_constraints.xml | 23 ++++++++++++++--------- xml/ha_storage_protection.xml | 15 ++++++++++----- 9 files changed, 42 insertions(+), 24 deletions(-) diff --git a/xml/article_pacemaker_remote.xml b/xml/article_pacemaker_remote.xml index c2dbcf712..52d61fc02 100644 --- a/xml/article_pacemaker_remote.xml +++ b/xml/article_pacemaker_remote.xml @@ -743,7 +743,8 @@ ssh: connect to host &node4; port 3121: Connection refused Create a dummy resource: - &prompt.root;crm configure primitive fake1 ocf:pacemaker:Dummy +&prompt.root;crm configure primitive fake1 ocf:pacemaker:Dummy \ + op monitor timeout=20s interval=10s Check the cluster status with the crm status command. diff --git a/xml/geo_ip_i.xml b/xml/geo_ip_i.xml index 07a2bdf37..cf6e00053 100644 --- a/xml/geo_ip_i.xml +++ b/xml/geo_ip_i.xml @@ -78,7 +78,8 @@ ip="192.168.3.4" \ keyfile="/etc/whereever/Kgeo-update*.key" \ server="192.168.1.1" \ - serverport="53" + serverport="53" \ + op monitor timeout=30s interval=10s diff --git a/xml/geo_resources_i.xml b/xml/geo_resources_i.xml index 6102f858c..afb4330b2 100644 --- a/xml/geo_resources_i.xml +++ b/xml/geo_resources_i.xml @@ -192,7 +192,8 @@ &prompt.crm.conf;primitive ip-booth ocf:heartbeat:IPaddr2 \ params iflabel="ha" nic="eth1" cidr_netmask="24" \ params rule #cluster-name eq &cluster1; ip="192.168.201.100" \ - params rule #cluster-name eq &cluster2; ip="192.168.202.100" + params rule #cluster-name eq &cluster2; ip="192.168.202.100"\ + op monitor timeout=20s interval=10s &prompt.crm.conf;primitive booth-site ocf:pacemaker:booth-site \ meta resource-stickiness="INFINITY" \ params config="nfs" op monitor interval="10s" diff --git a/xml/ha_configuring_resources.xml b/xml/ha_configuring_resources.xml index ed86971d1..fc43336e0 100644 --- a/xml/ha_configuring_resources.xml +++ b/xml/ha_configuring_resources.xml @@ -864,7 +864,8 @@ Configure the primitive, for example: - &prompt.crm.conf;primitive Apache apache + &prompt.crm.conf;primitive Apache apache \ + op monitor timeout=20s interval=10s diff --git a/xml/ha_fencing.xml b/xml/ha_fencing.xml index ca6174d75..a270dc3d3 100644 --- a/xml/ha_fencing.xml +++ b/xml/ha_fencing.xml @@ -362,11 +362,13 @@ &prompt.root;crm configure &prompt.crm.conf;primitive st-ibmrsa-1 stonith:external/ibmrsa-telnet \ -params nodename=&node1; ip_address=192.168.0.101 \ -username=USERNAME password=PASSW0RD + params nodename=&node1; ip_address=192.168.0.101 \ + username=USERNAME password=PASSW0RD \ + op monitor timeout=20s interval=3600s &prompt.crm.conf;primitive st-ibmrsa-2 stonith:external/ibmrsa-telnet \ -params nodename=&node2; ip_address=192.168.0.102 \ -username=USERNAME password=PASSW0RD + params nodename=&node2; ip_address=192.168.0.102 \ + username=USERNAME password=PASSW0RD \ + op monitor timeout=20s interval=3600s &prompt.crm.conf;location l-st-&node1; st-ibmrsa-1 -inf: &node1; &prompt.crm.conf;location l-st-&node2; st-ibmrsa-2 -inf: &node2; &prompt.crm.conf;commit diff --git a/xml/ha_gfs2.xml b/xml/ha_gfs2.xml index e369b16e0..9e2216f9f 100644 --- a/xml/ha_gfs2.xml +++ b/xml/ha_gfs2.xml @@ -143,7 +143,8 @@ Configure external/sbd as the fencing device: &prompt.crm.conf;primitive sbd_stonith stonith:external/sbd \ - params pcmk_delay_max=30 meta target-role="Started" + params pcmk_delay_max=30 meta target-role="Started" \ + op monitor timeout=20s interval=3600s diff --git a/xml/ha_ocfs2.xml b/xml/ha_ocfs2.xml index 142262f76..3c0830a1e 100644 --- a/xml/ha_ocfs2.xml +++ b/xml/ha_ocfs2.xml @@ -255,7 +255,8 @@ Configure external/sbd as the fencing device: &prompt.crm.conf;primitive sbd_stonith stonith:external/sbd \ - params pcmk_delay_max=30 meta target-role="Started" + params pcmk_delay_max=30 meta target-role="Started" \ + op monitor timeout=20s interval=3600s diff --git a/xml/ha_resource_constraints.xml b/xml/ha_resource_constraints.xml index dfcd07a07..77137321b 100644 --- a/xml/ha_resource_constraints.xml +++ b/xml/ha_resource_constraints.xml @@ -282,7 +282,8 @@ Another example is a location with ping: &prompt.crm.conf;primitive ping ping \ - params name=ping dampen=5s multiplier=100 host_list="r1 r2" + params name=ping dampen=5s multiplier=100 host_list="r1 r2" \ + op monitor timeout=60s interval=10s &prompt.crm.conf;clone cl-ping ping meta interleave=true &prompt.crm.conf;location loc-node_pref internal_www \ rule 50: #uname eq &node1; \ @@ -302,8 +303,8 @@ loc-&node1;, referencing the virtual IP addresses vip1 and vip2: -&prompt.crm.conf;primitive vip1 IPaddr2 params ip=&subnetI;.5 -&prompt.crm.conf;primitive vip2 IPaddr2 params ip=&subnetI;.6 +&prompt.crm.conf;primitive vip1 IPaddr2 params ip=&subnetI;.5 op monitor timeout=20s interval=10s +&prompt.crm.conf;primitive vip2 IPaddr2 params ip=&subnetI;.6 op monitor timeout=20s interval=10s &prompt.crm.conf;location loc-&node1; { vip1 vip2 } inf: &node1; In some cases it is much more efficient and convenient to use resource @@ -725,8 +726,8 @@ two virtual IPs (vip1 and vip2) on the same node, &node1;: -&prompt.crm.conf;primitive vip1 IPaddr2 params ip=&subnetI;.5 -&prompt.crm.conf;primitive vip2 IPaddr2 params ip=&subnetI;.6 +&prompt.crm.conf;primitive vip1 IPaddr2 params ip=&subnetI;.5 op monitor timeout=20s interval=10s +&prompt.crm.conf;primitive vip2 IPaddr2 params ip=&subnetI;.6 op monitor timeout=20s interval=10s &prompt.crm.conf;location loc-&node1; { vip1 vip2 } inf: &node1; @@ -1534,16 +1535,20 @@ &prompt.crm.conf;node &node3; utilization hv_memory="4000" &prompt.crm.conf;primitive xenA Xen \ utilization hv_memory="3500" meta priority="10" \ - params xmfile="/etc/xen/shared-vm/vm1" + params xmfile="/etc/xen/shared-vm/vm1" \ + op monitor timeout=30s interval=10s &prompt.crm.conf;primitive xenB Xen \ utilization hv_memory="2000" meta priority="1" \ - params xmfile="/etc/xen/shared-vm/vm2" + params xmfile="/etc/xen/shared-vm/vm2" \ + op monitor timeout=30s interval=10s &prompt.crm.conf;primitive xenC Xen \ utilization hv_memory="2000" meta priority="1" \ - params xmfile="/etc/xen/shared-vm/vm3" + params xmfile="/etc/xen/shared-vm/vm3" \ + op monitor timeout=30s interval=10s &prompt.crm.conf;primitive xenD Xen \ utilization hv_memory="1000" meta priority="5" \ - params xmfile="/etc/xen/shared-vm/vm4" + params xmfile="/etc/xen/shared-vm/vm4" \ + op monitor timeout=30s interval=10s &prompt.crm.conf;property placement-strategy="minimal" With all three nodes up, xenA is placed onto a node first, followed diff --git a/xml/ha_storage_protection.xml b/xml/ha_storage_protection.xml index de5a0532a..8da3e411e 100644 --- a/xml/ha_storage_protection.xml +++ b/xml/ha_storage_protection.xml @@ -875,9 +875,11 @@ Received command test from &node2; on disk /dev/disk/by-id/DEVICE_I and survives in a split-brain scenario: &prompt.crm.conf;primitive st-sbd-&node1; stonith:external/sbd params \ -pcmk_host_list=&node1; pcmk_delay_base=20 + pcmk_host_list=&node1; pcmk_delay_base=20 \ + op monitor timeout=20s interval=3600s &prompt.crm.conf;primitive st-sbd-&node2; stonith:external/sbd params \ -pcmk_host_list=&node2; pcmk_delay_base=0 + pcmk_host_list=&node2; pcmk_delay_base=0 \ + op monitor timeout=20s interval=3600s @@ -891,7 +893,8 @@ pcmk_host_list=&node2; pcmk_delay_base=0 a unified fencing resource targeting multiple nodes. &prompt.crm.conf;primitive stonith_sbd stonith:external/sbd \ -params pcmk_delay_max=30 + params pcmk_delay_max=30 \ + op monitor timeout=20s interval=3600s <parameter>pcmk_delay_max</parameter> might not prevent double reset in a split-brain scenario @@ -1226,7 +1229,8 @@ Illegal request, Invalid opcode Add a file system primitive for Ext4, using a stable device name for the disk partition: &prompt.crm.conf;primitive ext4 Filesystem \ - params device="/dev/disk/by-id/DEVICE_ID" directory="/mnt/ext4" fstype=ext4 + params device="/dev/disk/by-id/DEVICE_ID" directory="/mnt/ext4" fstype=ext4 \ + op monitor timeout=40s interval=20s Add the following order relationship plus a collocation between the @@ -1633,7 +1637,8 @@ Illegal request, Invalid opcode If you changed diskless SBD to disk-based SBD, you must configure a &stonith; resource for SBD. For example: -&prompt.root;crm configure primitive stonith-sbd stonith:external/sbd +&prompt.root;crm configure primitive stonith-sbd stonith:external/sbd \ + op monitor timeout=20s interval=3600s For more information, see in . From 48af9241269016e322e4adf208bf31816e51f5f6 Mon Sep 17 00:00:00 2001 From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com> Date: Wed, 12 Feb 2025 15:26:39 +1000 Subject: [PATCH 2/3] Adjust op monitors based on tech review --- xml/article_pacemaker_remote.xml | 10 +++------- xml/ha_config_cli.xml | 4 ++-- xml/ha_fencing.xml | 6 ++---- xml/ha_gfs2.xml | 3 +-- xml/ha_ocfs2.xml | 3 +-- xml/ha_storage_protection.xml | 16 ++++++---------- 6 files changed, 15 insertions(+), 27 deletions(-) diff --git a/xml/article_pacemaker_remote.xml b/xml/article_pacemaker_remote.xml index 52d61fc02..35022a186 100644 --- a/xml/article_pacemaker_remote.xml +++ b/xml/article_pacemaker_remote.xml @@ -712,13 +712,9 @@ ssh: connect to host &node4; port 3121: Connection refused &prompt.root;crm configure &prompt.crm.conf;primitive vm-&node4; ocf:heartbeat:VirtualDomain \ params hypervisor="qemu:///system" \ - config="/etc/pacemaker/&node4;.xml" \ - meta remote-node=&node4; - - &pace; automatically monitors &pmrm; connections for failure, - so it is not necessary to create a recurring monitor on the - VirtualDomain resource. - + config="/etc/pacemaker/&node4;.xml" \ + meta remote-node=&node4; \ + op monitor timeout=30s interval=10s Enabling live migration diff --git a/xml/ha_config_cli.xml b/xml/ha_config_cli.xml index ffe0550d6..2d5993628 100644 --- a/xml/ha_config_cli.xml +++ b/xml/ha_config_cli.xml @@ -269,9 +269,9 @@ crm cluster join ssh --use-ssh-agent -c USER@NODE1params section if it is the first and only section. For example, this line: - &prompt.root;crm primitive ipaddr IPaddr2 params ip=192.168.0.55 + &prompt.root;crm configure primitive ipaddr IPaddr2 params ip=192.168.0.55 is equivalent to this line: - &prompt.root;crm primitive ipaddr IPaddr2 ip=192.168.0.55 + &prompt.root;crm configure primitive ipaddr IPaddr2 ip=192.168.0.55 diff --git a/xml/ha_fencing.xml b/xml/ha_fencing.xml index a270dc3d3..c5222e441 100644 --- a/xml/ha_fencing.xml +++ b/xml/ha_fencing.xml @@ -363,12 +363,10 @@ &prompt.root;crm configure &prompt.crm.conf;primitive st-ibmrsa-1 stonith:external/ibmrsa-telnet \ params nodename=&node1; ip_address=192.168.0.101 \ - username=USERNAME password=PASSW0RD \ - op monitor timeout=20s interval=3600s + username=USERNAME password=PASSW0RD &prompt.crm.conf;primitive st-ibmrsa-2 stonith:external/ibmrsa-telnet \ params nodename=&node2; ip_address=192.168.0.102 \ - username=USERNAME password=PASSW0RD \ - op monitor timeout=20s interval=3600s + username=USERNAME password=PASSW0RD &prompt.crm.conf;location l-st-&node1; st-ibmrsa-1 -inf: &node1; &prompt.crm.conf;location l-st-&node2; st-ibmrsa-2 -inf: &node2; &prompt.crm.conf;commit diff --git a/xml/ha_gfs2.xml b/xml/ha_gfs2.xml index 9e2216f9f..e369b16e0 100644 --- a/xml/ha_gfs2.xml +++ b/xml/ha_gfs2.xml @@ -143,8 +143,7 @@ Configure external/sbd as the fencing device: &prompt.crm.conf;primitive sbd_stonith stonith:external/sbd \ - params pcmk_delay_max=30 meta target-role="Started" \ - op monitor timeout=20s interval=3600s + params pcmk_delay_max=30 meta target-role="Started" diff --git a/xml/ha_ocfs2.xml b/xml/ha_ocfs2.xml index 3c0830a1e..142262f76 100644 --- a/xml/ha_ocfs2.xml +++ b/xml/ha_ocfs2.xml @@ -255,8 +255,7 @@ Configure external/sbd as the fencing device: &prompt.crm.conf;primitive sbd_stonith stonith:external/sbd \ - params pcmk_delay_max=30 meta target-role="Started" \ - op monitor timeout=20s interval=3600s + params pcmk_delay_max=30 meta target-role="Started" diff --git a/xml/ha_storage_protection.xml b/xml/ha_storage_protection.xml index 8da3e411e..98f96031b 100644 --- a/xml/ha_storage_protection.xml +++ b/xml/ha_storage_protection.xml @@ -874,12 +874,10 @@ Received command test from &node2; on disk /dev/disk/by-id/DEVICE_I devices. In the following configuration, &node1; wins and survives in a split-brain scenario: -&prompt.crm.conf;primitive st-sbd-&node1; stonith:external/sbd params \ - pcmk_host_list=&node1; pcmk_delay_base=20 \ - op monitor timeout=20s interval=3600s -&prompt.crm.conf;primitive st-sbd-&node2; stonith:external/sbd params \ - pcmk_host_list=&node2; pcmk_delay_base=0 \ - op monitor timeout=20s interval=3600s +&prompt.crm.conf;primitive st-sbd-&node1; stonith:external/sbd \ + params pcmk_host_list=&node1; pcmk_delay_base=20 +&prompt.crm.conf;primitive st-sbd-&node2; stonith:external/sbd \ + params pcmk_host_list=&node2; pcmk_delay_base=0 @@ -893,8 +891,7 @@ Received command test from &node2; on disk /dev/disk/by-id/DEVICE_I a unified fencing resource targeting multiple nodes. &prompt.crm.conf;primitive stonith_sbd stonith:external/sbd \ - params pcmk_delay_max=30 \ - op monitor timeout=20s interval=3600s + params pcmk_delay_max=30 <parameter>pcmk_delay_max</parameter> might not prevent double reset in a split-brain scenario @@ -1637,8 +1634,7 @@ Illegal request, Invalid opcode If you changed diskless SBD to disk-based SBD, you must configure a &stonith; resource for SBD. For example: -&prompt.root;crm configure primitive stonith-sbd stonith:external/sbd \ - op monitor timeout=20s interval=3600s +&prompt.root;crm configure primitive stonith-sbd stonith:external/sbd For more information, see in . From 8a3abdee7f16382fe8d911f6d731bfc83226f753 Mon Sep 17 00:00:00 2001 From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com> Date: Wed, 12 Feb 2025 15:41:47 +1000 Subject: [PATCH 3/3] Small edit --- xml/article_pacemaker_remote.xml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/xml/article_pacemaker_remote.xml b/xml/article_pacemaker_remote.xml index 35022a186..49970265f 100644 --- a/xml/article_pacemaker_remote.xml +++ b/xml/article_pacemaker_remote.xml @@ -713,8 +713,12 @@ ssh: connect to host &node4; port 3121: Connection refused &prompt.crm.conf;primitive vm-&node4; ocf:heartbeat:VirtualDomain \ params hypervisor="qemu:///system" \ config="/etc/pacemaker/&node4;.xml" \ - meta remote-node=&node4; \ - op monitor timeout=30s interval=10s + meta remote-node=&node4; + + &pace; automatically monitors &pmrm; connections for failure, + so it is not necessary to create a recurring monitor on the + VirtualDomain resource. + Enabling live migration