-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlvcheck
executable file
·350 lines (293 loc) · 10.3 KB
/
lvcheck
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
#!/bin/bash
#
# lvcheck, version 1.0
# Maintainer: Bryan Kadzban <[email protected]>
# Other credits:
# Concept and original script by Theodore Tso <[email protected]>
# on_ac_power is mostly from Debian's powermgmt-base package
# Lots of help (ideas, initial XFS/JFS support, etc.) from
# Andreas Dilger <[email protected]>
# Better XFS support from Eric Sandeen <[email protected]>
# Released under the GNU General Public License, either version 2 or
# (at your option) any later version.
# Overview:
#
# Run this from cron periodically (e.g. once per week). If the
# machine is on AC power, it will run the checks; otherwise they will
# all be skipped. (If the script can't tell whether the machine is
# on AC power, it will use a setting in the configuration file
# (/etc/lvcheck.conf) to decide whether to continue with the checks,
# or abort.)
#
# The script will then decide which logical volumes are active, and
# can therefore be checked via an LVM snapshot. Each of these LVs
# will be queried to find its last-check day, and if that was more
# than $INTERVAL days ago (where INTERVAL is set in the configuration
# file as well), or if the last-check day can't be determined, then
# the script will take an LVM snapshot of that LV and run fsck on the
# snapshot. The snapshot will be set to use 1/500 the space of the
# source LV. After fsck finishes, the snapshot is destroyed.
# (Snapshots are checked serially.)
#
# Any LV that passes fsck should have its last-check time updated (in
# the real superblock, not the snapshot's superblock); any LV whose
# fsck fails will send an email notification to a configurable user
# ($EMAIL). This $EMAIL setting is optional, but its use is highly
# recommended, since if any LV fails, it will need to be checked
# manually, offline. Relevant messages are also sent to syslog.
# Set default values for configuration params. Changes to these values
# will be overwritten on an upgrade! To change these values, use
# /etc/lvcheck.conf.
EMAIL='root'
INTERVAL=30
AC_UNKNOWN="CONTINUE"
MINSNAP=256
MINFREE=0
# send $2 to syslog, with severity $1
# severities are emerg/alert/crit/err/warning/notice/info/debug
function log() {
local sev="$1"
local msg="$2"
local arg=
# log warning-or-higher messages to stderr as well
[ "$sev" == "emerg" -o "$sev" == "alert" -o "$sev" == "crit" -o \
"$sev" == "err" -o "$sev" == "warning" ] && arg=-s
logger -t lvcheck $arg -p user."$sev" -- "$msg"
}
# determine whether the machine is on AC power
function on_ac_power() {
local any_known=no
# try sysfs power class first
if [ -d /sys/class/power_supply ] ; then
for psu in /sys/class/power_supply/* ; do
if [ -r "${psu}/type" ] ; then
type="$(cat "${psu}/type")"
# ignore batteries
[ "${type}" = "Battery" ] && continue
online="`cat "${psu}/online"`"
[ "${online}" = 1 ] && return 0
[ "${online}" = 0 ] && any_known=yes
fi
done
[ "${any_known}" = "yes" ] && return 1
fi
# else fall back to AC adapters in /proc
if [ -d /proc/acpi/ac_adapter ] ; then
for ac in /proc/acpi/ac_adapter/* ; do
if [ -r "${ac}/state" ] ; then
grep -q on-line "${ac}/state" && return 0
grep -q off-line "${ac}/state" && any_known=yes
elif [ -r "${ac}/status" ] ; then
grep -q on-line "${ac}/status" && return 0
grep -q off-line "${ac}/status" && any_known=yes
fi
done
[ "${any_known}" = "yes" ] && return 1
fi
if [ "$AC_UNKNOWN" == "CONTINUE" ] ; then
return 0 # assume on AC power
elif [ "$AC_UNKNOWN" == "ABORT" ] ; then
return 1 # assume on battery
else
log "err" "Invalid value for AC_UNKNOWN in the config file"
exit 1
fi
}
# attempt to force a check of $1 on the next reboot
function try_force_check() {
local dev="$1"
local fstype="$2"
case "$fstype" in
ext2|ext3|ext4)
tune2fs -C 16000 "$dev"
;;
xfs)
# XFS does not enforce check intervals; let email suffice.
;;
*)
log "warning" "Don't know how to force a check on $fstype..."
;;
esac
}
# attempt to set the last-check time on $1 to now, and the mount count to 0.
function try_delay_checks() {
local dev="$1"
local fstype="$2"
case "$fstype" in
ext2|ext3|ext4)
tune2fs -C 0 -T now "$dev"
;;
xfs)
# XFS does not enforce check intervals; nothing to delay
;;
*)
log "warning" "Don't know how to delay checks on $fstype..."
;;
esac
}
# print the date that $1 was last checked, in a format that date(1) will
# accept, or "Unknown" if we don't know how to find that date.
function try_get_check_date() {
local dev="$1"
local fstype="$2"
case "$fstype" in
ext2|ext3|ext4)
dumpe2fs -h "$dev" 2>/dev/null | grep 'Last checked:' | \
sed -e 's/Last checked:[[:space:]]*//'
;;
*)
# XFS does not save the last-checked date
# TODO: add support for various other FSes
echo "Unknown"
;;
esac
}
# do any extra checks for filesystem type $2, on device $1
function should_still_check() {
local dev="$1"
local fstype="$2"
case "$fstype" in
ext2|ext3|ext4)
if tune2fs -l "$dev" | grep -q "Journal device" ; then
log "warning" "Cowardly refusing to check $dev, which has an external journal."
return 1
fi
esac
return 0
}
# check the FS on $1 passively, saving output to $3.
function perform_check() {
local dev="$1"
local fstype="$2"
local tmpfile="$3"
case "$fstype" in
ext2|ext3|ext4)
# first clear the orphaned-inode list, to avoid unnecessary FS changes
# in the next step (which would cause an "error" exit from e2fsck).
# -C 0 is present for cases where the script is run interactively
# (logsave -s strips out the progress bar). ignore the return status
# of this e2fsck, as it doesn't matter.
nice logsave -as "${tmpfile}" e2fsck -p -C 0 "$dev"
# then do the real check; -y is here to give more info on any errors
# that may be present on the FS, in the log file. the snapshot is
# writable, so it shouldn't break anything if e2fsck changes it.
nice logsave -as "${tmpfile}" e2fsck -fy -C 0 "$dev"
return $?
;;
reiserfs)
echo Yes | nice logsave -as "${tmpfile}" fsck.reiserfs --check "$dev"
# apparently can't fail? let's hope not...
return 0
;;
xfs)
nice logsave -as "${tmpfile}" xfs_repair -n "$dev"
return $?
;;
jfs)
nice logsave -as "${tmpfile}" fsck.jfs -fn "$dev"
return $?
;;
*)
log "warning" "Don't know how to check $fstype filesystems passively: assuming OK."
;;
esac
}
# do everything needed to check and reset dates and counters on /dev/$1/$2.
function check_fs() {
local vg="$1"
local lv="$2"
local fstype="$3"
local snapsize="$4"
local tmpfile=`mktemp -t lvcheck.log.XXXXXXXXXX`
local errlog="/var/log/lvcheck-${vg}@${lv}"
local snaplvbase="${lv}-lvcheck-temp"
local snaplv="${snaplvbase}-`date +'%Y%m%d'`"
# clean up any left-over snapshot LVs
for lvtemp in /dev/${vg}/${snaplvbase}* ; do
if [ -e "$lvtemp" ] ; then
# Assume the script won't run more than one instance at a time?
log "warning" "Found stale snapshot $lvtemp: attempting to remove."
if ! lvremove -f "${lvtemp##/dev}" ; then
log "error" "Could not delete stale snapshot $lvtemp"
return 1
fi
fi
done
# and create this one
lvcreate -s -L "${snapsize}M" -n "${snaplv}" "${vg}/${lv}"
if perform_check "/dev/${vg}/${snaplv}" "${fstype}" "${tmpfile}" ; then
log "info" "Background scrubbing of /dev/${vg}/${lv} succeeded."
try_delay_checks "/dev/${vg}/${lv}" "$fstype"
else
log "err" "Background scrubbing of /dev/${vg}/${lv} failed: run fsck offline soon!"
try_force_check "/dev/${vg}/${lv}" "$fstype"
if test -n "$EMAIL"; then
mail -s "Fsck of /dev/${vg}/${lv} failed!" $EMAIL < $tmpfile
fi
# save the log file in /var/log in case mail is disabled
(
echo ""
echo -n " Check on " ; date +'%Y-%m-%d'
echo "======================="
cat "$tmpfile"
) >>"$errlog"
fi
rm -f "$tmpfile"
lvremove -f "${vg}/${snaplv}"
}
# pull in configuration -- overwrite the defaults above if the file exists
[ -r /etc/lvcheck.conf ] && . /etc/lvcheck.conf
# check whether the machine is on AC power: if not, skip fsck
on_ac_power || exit 0
# ensure snapshot support exists
if ! /sbin/dmsetup targets | grep -q snapshot ; then
if ! /sbin/modprobe dm-snapshot ; then
echo "Couldn't modprobe dm-snapshot, and no snapshot support exists in" >&2
echo "device-mapper (according to \"dmsetup targets\"). Bailing." >&2
exit 1
fi
fi
# parse up lvscan output
lvscan 2>&1 | grep ACTIVE | awk '{print $2;}' | \
while read DEV ; do
# remove the single quotes around the device name
DEV="`echo "$DEV" | tr -d \'`"
# get the FS type: blkid prints TYPE="blah"
eval `blkid -s TYPE "$DEV" | cut -d' ' -f2`
# see whether this FS needs any extra checks that might disqualify this device
should_still_check "$DEV" "$TYPE" || continue
# get the last-check time
check_date=`try_get_check_date "$DEV" "$TYPE"`
# if the date is unknown, run fsck every time the script runs. sigh.
if [ "$check_date" != "Unknown" ] ; then
# add $INTERVAL days, and throw away the time portion
check_day=`date --date="$check_date $INTERVAL days" +'%Y%m%d'`
# get today's date, and skip the check if it's not within the interval
today=`date +'%Y%m%d'`
[ $check_day -gt $today ] && continue
fi
# get the volume group and logical volume names
VG="`lvs --noheadings -o vg_name "$DEV" | tr -d ' '`"
LV="`lvs --noheadings -o lv_name "$DEV" | tr -d ' '`"
# get the free space and LV size (in megs), guess at the snapshot
# size, and see how much the admin will let us use (keeping MINFREE
# available)
SPACE="`lvs --noheadings --units M --nosuffix -o vg_free "$DEV" | tr -d ' '`"
SIZE="`lvs --noheadings --units M --nosuffix -o lv_size "$DEV" | tr -d ' '`"
SNAPSIZE="`expr "${SIZE%%.*}" / 500`"
AVAIL="`expr "${SPACE%%.*}" - "$MINFREE"`"
# if we don't even have MINSNAP space available, skip the LV
if [ "$MINSNAP" -gt "$AVAIL" -o "$AVAIL" -le 0 ] ; then
log "warning" "Not enough free space on volume group for ${DEV}; skipping"
continue
fi
# make snapshot large enough to handle e.g. journal and other updates
[ "$SNAPSIZE" -lt "$MINSNAP" ] && SNAPSIZE="$MINSNAP"
# limit snapshot to available space (VG space minus min-free)
[ "$SNAPSIZE" -gt "$AVAIL" ] && SNAPSIZE="$AVAIL"
# don't need to check SNAPSIZE again: MINSNAP <= AVAIL, MINSNAP <= SNAPSIZE,
# and SNAPSIZE <= AVAIL, combined, means SNAPSIZE must be between MINSNAP
# and AVAIL, which is what we need -- assuming AVAIL > 0
# check it
check_fs "$VG" "$LV" "$TYPE" "$SNAPSIZE"
done