Skip to content

Commit

Permalink
add memory-exhaustion test
Browse files Browse the repository at this point in the history
Signed-off-by: Douglas Schilling Landgraf <[email protected]>
  • Loading branch information
dougsland committed Jul 28, 2024
1 parent c9dab15 commit 4ad799f
Show file tree
Hide file tree
Showing 8 changed files with 333 additions and 3 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
vmstat.log
memory-exhaustion-script
8 changes: 6 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ SCRIPTS = cgroup \
engine \
network \
engine-operations \
disk-exhaustion \
disk-exhaustion/disk-exhaustion \
memory-exhaustion/memory-exhaustion \
processes \
volume \
stress \
Expand Down Expand Up @@ -52,7 +53,10 @@ install: installdeps
@if ! grep -q '^SHARE_DIR=$(SHAREDIR)' $(DESTDIR)$(CONFIGDIR)/$(CONFIG_FILE); then \
echo 'SHARE_DIR=$(SHAREDIR)' >> $(DESTDIR)$(CONFIGDIR)/$(CONFIG_FILE); \
fi
@echo "Installation complete."
@echo "Installation complete via PREFIX: $(PREFIX)"
@echo " - bin: $(DESTDIR)$(BINDIR)$(BIN_FILE)"
@echo " - docs: $(DESTDIR)$(SHAREDIR_DOC)"
@echo " - libs: $(DESTDIR)$(SHAREDIR)"

installdeps:
@if test -x "$(DNF_OR_YUM)"; then rpm -q aardvark-dns > /dev/null 2>&1 || $(DNF_OR_YUM) -y install aardvark-dns; fi
Expand Down
16 changes: 15 additions & 1 deletion constants
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ FEATURE_DISABLED=false

###############################################################################
# Description
# List current state of network, storage and containers. (true or false) Default: false
# Disk exhaustion test options
#DISK_EXHAUSTION="${FEATURE_ENABLED}"
#DISK_EXHAUSTION_THRESHOLD_PERCENT=95 # The threshold percentage for disk usage to trigger additional container creation
#DISK_EXHAUSTION_QUOTA_GB=1 # The size of the disk quota in gigabytes
Expand All @@ -87,6 +87,20 @@ FEATURE_DISABLED=false
#DISK_EXHAUSTION_MOUNT_POINT="/mnt/test_volume" # The mount point inside the container
###############################################################################

###############################################################################
# Description
# Memory exhaustion test options
#MEMORY_EXHAUSTION="${FEATURE_ENABLED}"
#MEMORY_EXHAUSTION_BASE_CONTAINER_NAME="memory_eater_base_container"
#MEMORY_EXHAUSTION_BASE_CONTAINER_MEMORY_LIMIT=512m # Memory limit for the main container
#MEMORY_EXHAUSTION_BASE_CONTAINER_THRESHOLD_PERCENT=30 # PERCENT threshold for memory usage to trigger additional NESTED containers creation, in this case 30%
#MEMORY_EXHAUSTION_BASE_CONTAINER_WAIT_TIME_BEFORE_STOPPING_NESTED_CONTAINERS_IN_SEC=5
#MEMORY_EXHAUSTION_BASE_CONTAINER_SCRIPT_EXAUST_MEMORY=$(pwd)/memory-exhaustion-script
#MEMORY_EXHAUSTION_BASE_CONTAINER_STRESS_NG_VM=1
#MEMORY_EXHAUSTION_BASE_CONTAINER_STRESS_NG_VM_METHOD=write64
#MEMORY_EXHAUSTION_BASE_CONTAINER_STRESS_NG_TIMEOUT=60s
###############################################################################

###############################################################################
# Description
# List current state of network, storage and containers. (true or false) Default: false
Expand Down
72 changes: 72 additions & 0 deletions demo/run-example-memory-exhaustion
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/bin/bash
# shellcheck disable=SC1091
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
CONFIG_DIR="$HOME/.config/engine-stressor"
CONSTANTS_FILE="$CONFIG_DIR/constants"

if [ ! -f "$CONSTANTS_FILE" ]; then
echo "Error: File $CONSTANTS_FILE does not exist."
exit 1
fi

source "$CONSTANTS_FILE"

podman_ps_by_name() {
echo -e "\nContainers running listed by name:"
sudo podman ps --format "\t - {{.Names}}"
}

# network and volume definition to be used in remove and creation
NETNAME="my_network"
VOLNAME="my_volume"
NRM_CONTAINERS=5

echo -e "======================"
echo -e "engine-stressor demo"
echo -e "======================\n"

# Just make sure we are in a fresh env
echo -e "INFO: Cleaning previous engine-stressor container volume settings..."
sudo podman volume rm "${VOLNAME}" --force &> /dev/null

echo -e "INFO: Cleaning previous engine-stressor container network settings..."
sudo podman network rm "${NETNAME}" --force &> /dev/null

echo -e "INFO: Triggering ${NRM_CONTAINERS} containers with engine-stressor..."
pushd .. 1> /dev/null || exit 1
# starting the engine-stressor
sudo TOTAL_MEMORY_FOR_THE_NAMESPACE="1G" \
LIST_CURRENT_STATE=true \
VERBOSE=true \
MEMORY_EXHAUSTION=true \
MEMORY_EXHAUSTION_BASE_CONTAINER_NAME="memory_eater_base_container" \
MEMORY_EXHAUSTION_BASE_CONTAINER_MEMORY_LIMIT=512m \
MEMORY_EXHAUSTION_BASE_CONTAINER_THRESHOLD_PERCENT=30 \
MEMORY_EXHAUSTION_BASE_CONTAINER_WAIT_TIME_BEFORE_STOPPING_NESTED_CONTAINERS_IN_SEC=5 \
MEMORY_EXHAUSTION_BASE_CONTAINER_SCRIPT_EXAUST_MEMORY=$(pwd)/memory-exhaustion-script \
MEMORY_EXHAUSTION_BASE_CONTAINER_STRESS_NG_VM=1 \
MEMORY_EXHAUSTION_BASE_CONTAINER_STRESS_NG_VM_METHOD=write64 \
MEMORY_EXHAUSTION_BASE_CONTAINER_STRESS_NG_TIMEOUT=60s \
CLEANUP=false \
TIMEOUT_PODMAN_STOP_CONTAINER=5 \
NETWORK_NAME="${NETNAME}" \
VOLUME_NAME="${VOLNAME}" \
IMAGE_NAME_CONTAINER="quay.io/podman/stable" \
IMAGE_COMMAND="sleep 3600" \
NUMBER_OF_CONTAINERS="$NRM_CONTAINERS" \
./engine-stressor

# podman_ps_by_name
popd 1> /dev/null || exit 1
File renamed without changes.
3 changes: 3 additions & 0 deletions engine
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ fi
source "$CONSTANTS_FILE"
source "$SHARE_DIR/cgroup"
source "$SHARE_DIR/disk-exhaustion"
source "$SHARE_DIR/memory-exhaustion"
source "$SHARE_DIR/common"
source "$SHARE_DIR/stress"
source "$SHARE_DIR/systemd"
Expand Down Expand Up @@ -225,6 +226,8 @@ main() {

if [ -n "${DISK_EXHAUSTION}" ]; then
disk_exhaustion_test "disk_exhaustion_test"
elif [ -n "${MEMORY_EXHAUSTION}" ]; then
memory_exhaustion_test
else
# No specific test, just creation of containers
execute_action_in_containers_in_parallel "create_container"
Expand Down
25 changes: 25 additions & 0 deletions memory-exhaustion/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# engine-stressor: memory-exhaustion

## Table of Contents

- [How to monitor the usage of memory my base container is consuming?](#how-to-monitor-the-usage-of-memory-my-base-container-is-consuming)
- [Why not capture the memory consumption running a command inside the container?](#why-not-capture-the-memory-consumption-running-a-command-inside-the-container)
- [Why does increasing the `THRESHOLD_PERCENT` to 90 and having a `MEMORY_LIMIT` of 512 cause the system to get stuck and then proceed after 1 minute?](#why-does-increasing-the-threshold_percent-to-90-and-having-a-memory_limit-of-512-cause-the-system-to-get-stuck-and-then-proceed-after-1-minute)

## How to monitor the usage of memory my base container is consuming?

To monitor the memory usage of Podman during the stress-ng stress test, open a second terminal and run:

```sh
watch sudo podman stats --no-stream --format "{{.MemPerc}}" memory_eater_base_container
```

## Why not capture the memory consumption running a command inside the container?

Running the command like `ps eo cmd,%mem,%cpu --sort=%mem` inside the container is NOT recommended. It would only show resource usage from the container's perspective, which might not give a complete picture of the container's impact on the host system.

## Why does increasing the `THRESHOLD_PERCENT` to 90 and having a `MEMORY_LIMIT` of 512 cause the system to get stuck and then proceed after 1 minute?

When your system memory resources are low, and the memory usage reaches 90% or more, the operating system and Podman might not be able to allocate the necessary resources to continue tasks. This can prevent actions such as triggering new nested containers or stopping containers until some memory is freed.

For example, after 60 seconds have passed (1 minute) from the stress-ng timeout, the system may free up enough memory to allow these tasks to proceed.
211 changes: 211 additions & 0 deletions memory-exhaustion/memory-exhaustion
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
#!/bin/bash

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Function to start a new container with the memory exhaustion script
start_initial_container() {
echo "INFO: Starting initial container $MEMORY_EXHAUSTION_BASE_CONTAINER_NAME"
command="sudo podman run -d --privileged \
--name \"$MEMORY_EXHAUSTION_BASE_CONTAINER_NAME\" \
--replace \
--memory=\"$MEMORY_EXHAUSTION_BASE_CONTAINER_MEMORY_LIMIT\" \
-v \"$MEMORY_EXHAUSTION_BASE_CONTAINER_SCRIPT_EXAUST_MEMORY\":/memory-exhaustion-script \
\"$IMAGE_NAME_CONTAINER\" /bin/sh -c \"/memory-exhaustion-script & sleep infinity\""

if eval $command; then
echo "INFO: Started container $MEMORY_EXHAUSTION_BASE_CONTAINER_NAME successfully"
else
echo "ERROR: Failed to start container $MEMORY_EXHAUSTION_BASE_CONTAINER_NAME"
exit 1
fi
}

# Function to create memory exhaustion script if not exists
create_memory_exhaustion_script() {
if [ ! -f "$MEMORY_EXHAUSTION_BASE_CONTAINER_SCRIPT_EXAUST_MEMORY" ]; then
cat << 'EOF' > "$MEMORY_EXHAUSTION_BASE_CONTAINER_SCRIPT_EXAUST_MEMORY"
#!/bin/sh
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# memory-exhaustion-script
MEMORY_EXHAUSTION_BASE_CONTAINER_THRESHOLD_PERCENT=30 # The same as THRESHOLD above
# Function to check if a package is installed
is_installed() {
rpm -q "$1" &> /dev/null
return $?
}
# Function to stress memory
stress_memory() {
# Get total memory in MB
total_mem=$(free -m | awk '/Mem:/ {print $2}')
# Calculate % of total memory
mem_to_stress=$((total_mem * $MEMORY_EXHAUSTION_BASE_CONTAINER_THRESHOLD_PERCENT / 100))
# Run stress-ng to stress the porcentage defined of total memory
STRESS_NG_CMD="sudo stress-ng \
--vm ${MEMORY_EXHAUSTION_BASE_CONTAINER_STRESS_NG_VM} \
--vm-bytes ${mem_to_stress}M \
--vm-method ${MEMORY_EXHAUSTION_BASE_CONTAINER_STRESS_NG_VM_METHOD} \
--timeout ${MEMORY_EXHAUSTION_BASE_CONTAINER_STRESS_NG_TIMEOUT}"
eval "$STRESS_NG_CMD"
}
echo "Memory Threshold is set to $MEMORY_EXHAUSTION_BASE_CONTAINER_THRESHOLD_PERCENT%..."
# Check and install procps-ng if not installed
if ! is_installed procps-ng; then
echo "procps-ng is not installed. Installing..."
sudo dnf install -y procps-ng
fi
# Check and install stress-ng if not installed
if ! is_installed stress-ng; then
echo "stress-ng is not installed. Installing..."
sudo dnf install -y stress-ng
fi
echo "Starting the stress process..."
# Stress memory
stress_memory
# Keep the script running
wait
EOF
chmod +x "$MEMORY_EXHAUSTION_BASE_CONTAINER_SCRIPT_EXAUST_MEMORY"
echo "INFO: Created memory exhaustion script at $MEMORY_EXHAUSTION_BASE_CONTAINER_SCRIPT_EXAUST_MEMORY"
fi
}

# Function to get memory usage of a container as a percentage
# Arguments:
# container_name: The name of the container
get_memory_usage() {
local container_name=$1
podman stats \
--no-stream \
--format "{{.MemPerc}}" \
"$container_name" | sed 's/%//'
}

# Function to create additional containers inside the initial container
# Arguments:
# container_name: The name of the initial container
create_additional_containers() {
local container_name=$1
echo "INFO: Creating additional containers inside $container_name"
for i in $(seq 1 "$NUMBER_OF_CONTAINERS"); do
local additional_container_name="${container_name}_nested_$i"
local run_cmd="podman run -d --name \"$additional_container_name\" --privileged $IMAGE_NAME_CONTAINER sleep infinity"
echo "INFO: Running nested container $additional_container_name inside $container_name with image $IMAGE_NAME_CONTAINER"
echo "INFO: $run_cmd"
if ! sudo podman exec "$container_name" sh -c "$run_cmd"; then
echo "ERROR: Failed to run nested container $additional_container_name inside $container_name"
handle_error
fi
echo "INFO: Started nested container $additional_container_name inside $container_name"
done
}

# Function to stop and remove the nested containers
cleanup_nested_containers() {
echo "INFO: Cleaning up nested containers"
for i in $(seq 1 $NUMBER_OF_CONTAINERS); do
local additional_container_name="${MEMORY_EXHAUSTION_BASE_CONTAINER_NAME}_nested_$i"
if podman exec "$MEMORY_EXHAUSTION_BASE_CONTAINER_NAME" podman stop "$additional_container_name" && podman exec "$MEMORY_EXHAUSTION_BASE_CONTAINER_NAME" podman rm "$additional_container_name" --force; then
echo "INFO: Nested container $additional_container_name stopped and removed successfully"
else
echo "ERROR: Failed to stop and remove nested container $additional_container_name"
exit 1
fi
done
}

# Function to stop and remove the initial container
cleanup_initial_container() {
echo "INFO: Cleaning up container $MEMORY_EXHAUSTION_BASE_CONTAINER_NAME"

STOP_CONTAINER_CMD="podman stop \"$MEMORY_EXHAUSTION_BASE_CONTAINER_NAME\""
REMOVE_CONTAINER_CMD="podman rm \"$MEMORY_EXHAUSTION_BASE_CONTAINER_NAME\" --force"

if eval "$STOP_CONTAINER_CMD"; then
echo "INFO: Container $MEMORY_EXHAUSTION_BASE_CONTAINER_NAME stopped successfully"
else
echo "ERROR: Failed to stop container $MEMORY_EXHAUSTION_BASE_CONTAINER_NAME"
exit 1
fi

if eval "$REMOVE_CONTAINER_CMD"; then
echo "INFO: Container $MEMORY_EXHAUSTION_BASE_CONTAINER_NAME removed successfully"
else
echo "ERROR: Failed to remove container $MEMORY_EXHAUSTION_BASE_CONTAINER_NAME"
exit 1
fi
}

# Function to cleanup everything and exit with an error code
handle_error() {
cleanup_nested_containers
cleanup_initial_container
exit 1
}

memory_exhaustion_test() {
# Create the memory exhaustion script
create_memory_exhaustion_script

# Start the initial container
start_initial_container

# Main monitoring loop
while true; do
# Get current memory usage of the initial container
current_usage=$(get_memory_usage "$MEMORY_EXHAUSTION_BASE_CONTAINER_NAME")
echo "INFO: Current memory usage of $MEMORY_EXHAUSTION_BASE_CONTAINER_NAME: $current_usage%"

# Check if memory usage exceeds threshold
if [ "$(echo "$current_usage >= $MEMORY_EXHAUSTION_BASE_CONTAINER_THRESHOLD_PERCENT" | bc)" -eq 1 ]; then
echo "INFO: Memory usage of $MEMORY_EXHAUSTION_BASE_CONTAINER_NAME exceeded threshold of $MEMORY_EXHAUSTION_BASE_CONTAINER_THRESHOLD_PERCENT%"
if create_additional_containers "$MEMORY_EXHAUSTION_BASE_CONTAINER_NAME"; then
echo "INFO: All nested containers started successfully"
sleep ${MEMORY_EXHAUSTION_BASE_CONTAINER_WAIT_TIME_BEFORE_STOPPING_NESTED_CONTAINERS_IN_SEC}
cleanup_nested_containers
cleanup_initial_container
echo "PASS: Memory exhaustion test completed successfully"
exit 0
else
echo "ERROR: Failed to start nested containers"
handle_error
fi
fi

# Sleep for a short period before checking memory usage again
sleep 1
done
}

# memory_exhaustion_test "memory-exhaustion-test"

0 comments on commit 4ad799f

Please sign in to comment.