Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add buffersize argument to rdfind (slightly modified version of trollkarlen's MR) #180

Merged
merged 3 commits into from
Feb 1, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ TESTS=testcases/largefilesupport.sh \
testcases/verify_deterministic_operation.sh \
testcases/checksum_options.sh \
testcases/md5collisions.sh \
testcases/sha1collisions.sh
testcases/sha1collisions.sh \
testcases/checksum_buffersize.sh

AUXFILES=testcases/common_funcs.sh \
testcases/md5collisions/letter_of_rec.ps \
Expand Down
45 changes: 45 additions & 0 deletions testcases/checksum_buffersize.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/bin/sh
# Test that selection of buffersizes works as expected.

set -e
. "$(dirname "$0")/common_funcs.sh"

reset_teststate

TEST_DIR=buffersizes_test
mkdir -p "$TEST_DIR"

make_test_files() {
dbgecho "creating test files in $TEST_DIR"
head -c 1000000 /dev/zero >"$TEST_DIR/a"
cp "$TEST_DIR/a" "$TEST_DIR/b"
cp "$TEST_DIR/a" "$TEST_DIR/c"
cp "$TEST_DIR/a" "$TEST_DIR/d"
cp "$TEST_DIR/a" "$TEST_DIR/e"
}

dbgecho "check so all buffersizes behave the same"

# disables only run once shellscheck
# shellcheck disable=SC2043
for checksumtype in sha256; do
i=1
while :; do
if [ $i -gt 128 ]; then
break
fi
i="$((i*2))"
make_test_files
dbgecho "testing buffersize $((i*1024))"
dbgecho "testing $checksumtype"
# Fix this properly by making rdfind to array and use "${rdfind[@]}"
# this requires bash not sh
# shellcheck disable=SC2086
$rdfind -buffersize $((i*1024)) -checksum "$checksumtype" -deleteduplicates true "$TEST_DIR" >/dev/null
pauldreik marked this conversation as resolved.
Show resolved Hide resolved
[ -e "$TEST_DIR/a" ]
[ ! -e "$TEST_DIR/b" ]
[ ! -e "$TEST_DIR/c" ]
[ ! -e "$TEST_DIR/d" ]
[ ! -e "$TEST_DIR/e" ]
done
done
48 changes: 48 additions & 0 deletions testcases/checksum_buffersize_speedtest.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/bin/sh
# Performance test for checksumming with different buffersizes. Not meant
# to be run for regular testing.

set -e
. "$(dirname "$0")/common_funcs.sh"

reset_teststate

TEST_DIR=buffersizes_speedtest
mkdir -p "$TEST_DIR"

make_test_files() {
dbgecho "creating test files in $TEST_DIR/bigfiles"
mkdir -p "$TEST_DIR/bigfiles"
head -c $((1024*1024*500)) /dev/zero >"$TEST_DIR/bigfiles/a"
for f in b c d e; do
cp "$TEST_DIR/bigfiles/a" "$TEST_DIR/bigfiles/$f"
done
dbgecho "creating test files in $TEST_DIR/smallfiles"
mkdir -p "$TEST_DIR/smallfiles"
(cd "$TEST_DIR/smallfiles"; head -c100000000 /dev/zero |split --bytes 1000)
}

dbgecho "run speed test for all shecksums and buffersizes"

make_test_files

cat /dev/null >"$TEST_DIR/results.tsv"
for filesize in big small; do
for checksumtype in md5 sha1; do
i=1
while :; do
if [ $i -gt 4096 ]; then
break
fi
# Fix this properly by making rdfind to array and use "${rdfind[@]}"
# this requires bash not sh
# shellcheck disable=SC2086
dbgecho "testing $checksumtype $i kB buffersize"
# shellcheck disable=SC2086
/usr/bin/time --append --output=$TEST_DIR/results.tsv -f "$filesize\t$i\t$checksumtype\t%e\t%M\t%C" $rdfind -buffersize $((i*1024)) -checksum "$checksumtype" -dryrun true -deleteduplicates true "$TEST_DIR/${filesize}files" >/dev/null 2>&1
i="$((i*2))"
done
done
done
cat "$TEST_DIR/results.tsv"

Loading