Gitweb:
http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: 8dff6d4626831bf941a32ee75e9b802fc51a0e8f
Parent: fbc82625c84ca9dfd99f7d8e3e051c53a63bf523
Author: Lon Hohberger <lhh(a)redhat.com>
AuthorDate: Wed Apr 14 17:29:54 2010 -0400
Committer: Lon Hohberger <lhh(a)redhat.com>
CommitterDate: Thu Jul 29 14:06:48 2010 -0400
rgmanager: Kill processes correctly w/ force_unmount
The killMountProcesses function was written about 10 years ago.
It was designed to work with lsof or fuser, and to log messages
for each process killed. This is not a bad idea. The problem
is that parsing the output of either is and error-prone,
particularly when mountpoints are similar to other directories
on the system.
A far less error-prone method to cleaning up a mount point is to
use 'fuser -kvm' on it. Not only is this less error-prone, it's
a good bit faster at doing its job than iterating through output
in a shell script.
This patch makes force_unmount very reliable at killing the correct
processes, but we lose the logging functionality. It is a fair
trade-off because there have been several bugs in the
killMountProcesses function over the years which have caused several
problems.
Resolves: bz573705
Signed-off-by: Lon Hohberger <lhh(a)redhat.com>
---
rgmanager/src/resources/clusterfs.sh | 113 ++----------------------------
rgmanager/src/resources/fs.sh | 124 ++-------------------------------
rgmanager/src/resources/netfs.sh | 128 ++++------------------------------
3 files changed, 25 insertions(+), 340 deletions(-)
diff --git a/rgmanager/src/resources/clusterfs.sh b/rgmanager/src/resources/clusterfs.sh
index 250978e..6a837b1 100755
--- a/rgmanager/src/resources/clusterfs.sh
+++ b/rgmanager/src/resources/clusterfs.sh
@@ -504,113 +504,6 @@ isAlive()
#
-# killMountProcesses device mount_point
-#
-# Using lsof or fuser try to unmount the mount by killing of the processes
-# that might be keeping it busy.
-#
-killMountProcesses()
-{
- typeset -i ret=$SUCCESS
- typeset have_lsof=""
- typeset have_fuser=""
- typeset try
-
- if [ $# -ne 1 ]; then
- ocf_log err \
- "Usage: killMountProcesses mount_point"
- return $FAIL
- fi
-
- typeset mp=$1
-
- ocf_log notice "Forcefully unmounting $mp"
-
- #
- # Not all distributions have lsof. If not use fuser. If it
- # does, try both.
- #
- file=$(which lsof 2>/dev/null)
- if [ -f "$file" ]; then
- have_lsof=$YES
- fi
-
- file=$(which fuser 2>/dev/null)
- if [ -f "$file" ]; then
- have_fuser=$YES
- fi
-
- if [ -z "$have_lsof" -a -z "$have_fuser" ]; then
- ocf_log warn \
- "Cannot forcefully unmount $mp; cannot find lsof or fuser commands"
- return $FAIL
- fi
-
- for try in 1 2 3; do
- if [ -n "$have_lsof" ]; then
- #
- # Use lsof to free up mount point
- #
- while read command pid user
- do
- if [ -z "$pid" ]; then
- continue
- fi
-
- if [ $try -eq 1 ]; then
- ocf_log warn \
- "killing process $pid ($user $command $mp)"
- elif [ $try -eq 3 ]; then
- ocf_log crit \
- "Could not clean up mountpoint $mp"
- ret=$FAIL
- fi
-
- if [ $try -gt 1 ]; then
- kill -9 $pid
- else
- kill -TERM $pid
- fi
- done < <(lsof -b 2>/dev/null | \
- grep -E "$mp(/.*|)\$" | \
- awk '{print $1,$2,$3}' | \
- sort -u -k 1,3)
- elif [ -n "$have_fuser" ]; then
- #
- # Use fuser to free up mount point
- #
- while read command pid user
- do
- if [ -z "$pid" ]; then
- continue
- fi
-
- if [ $try -eq 1 ]; then
- ocf_log warn \
- "killing process $pid ($user $command $mp)"
- elif [ $try -eq 3 ]; then
- ocf_log crit \
- "Could not clean up mount point $mp"
- ret=$FAIL
- fi
-
- if [ $try -gt 1 ]; then
- kill -9 $pid
- else
- kill -TERM $pid
- fi
- done < <(fuser -vm $mp 2>&1 | \
- grep -v PID | \
- sed 's;^'$mp:';;' | \
- awk '{print $4,$2,$1}' | \
- sort -u -k 1,3)
- fi
- done
-
- return $ret
-}
-
-#
# startFilesystem
#
startFilesystem() {
@@ -891,7 +784,11 @@ stop: Could not match $OCF_RESKEY_device with a real device"
umount_failed=yes
if [ "$force_umount" ]; then
- killMountProcesses $mp
+ if [ $try -eq 1 ]; then
+ fuser -TERM -kvm "$mp"
+ else
+ fuser -kvm "$mp"
+ fi
fi
if [ $try -ge $max_tries ]; then
diff --git a/rgmanager/src/resources/fs.sh b/rgmanager/src/resources/fs.sh
index 900dca5..a2148f4 100755
--- a/rgmanager/src/resources/fs.sh
+++ b/rgmanager/src/resources/fs.sh
@@ -695,115 +695,8 @@ isAlive()
#
-# killMountProcesses mount_point
-#
-# Using lsof or fuser try to unmount the mount by killing of the processes
-# that might be keeping it busy.
-#
-killMountProcesses()
-{
- typeset -i ret=$SUCCESS
- typeset have_lsof=""
- typeset have_fuser=""
- typeset try
-
- if [ $# -ne 1 ]; then
- ocf_log err \
- "Usage: killMountProcesses mount_point"
- return $FAIL
- fi
-
- typeset mp=$1
-
- ocf_log notice "Forcefully unmounting $mp"
-
- #
- # Not all distributions have lsof. If not use fuser. If it
- # does, try both.
- #
- file=$(which lsof 2>/dev/null)
- if [ -f "$file" ]; then
- have_lsof=$YES
- fi
-
- file=$(which fuser 2>/dev/null)
- if [ -f "$file" ]; then
- have_fuser=$YES
- fi
-
- if [ -z "$have_lsof" -a -z "$have_fuser" ]; then
- ocf_log warn \
- "Cannot forcefully unmount $mp; cannot find lsof or fuser commands"
- return $FAIL
- fi
-
- for try in 1 2 3; do
- if [ -n "$have_lsof" ]; then
- #
- # Use lsof to free up mount point
- #
- while read command pid user
- do
- if [ -z "$pid" ]; then
- continue
- fi
-
- if [ $try -eq 1 ]; then
- ocf_log warn \
- "killing process $pid ($user $command $mp)"
- elif [ $try -eq 3 ]; then
- ocf_log crit \
- "Could not clean up mountpoint $mp"
- ret=$FAIL
- fi
-
- if [ $try -gt 1 ]; then
- kill -9 $pid
- else
- kill -TERM $pid
- fi
- done < <(lsof -bn 2>/dev/null | \
- grep -E " $mp(/| |$)" | \
- awk '{print $1,$2,$3}' | \
- sort -u -k 1,3)
- elif [ -n "$have_fuser" ]; then
- #
- # Use fuser to free up mount point
- #
- while read command pid user
- do
- if [ -z "$pid" ]; then
- continue
- fi
-
- if [ $try -eq 1 ]; then
- ocf_log warn \
- "killing process $pid ($user $command $mp)"
- elif [ $try -eq 3 ]; then
- ocf_log crit \
- "Could not clean up mount point $mp"
- ret=$FAIL
- fi
-
- if [ $try -gt 1 ]; then
- kill -9 $pid
- else
- kill -TERM $pid
- fi
- done < <(fuser -vm $mp 2>&1 | \
- grep -v PID | \
- sed 's;^'$mp:';;' | \
- awk '{print $4,$2,$1}' | \
- sort -u -k 1,3)
- fi
- done
-
- return $ret
-}
-
-
-#
-# Enable quotas on the mount point if the user requested them
+# Decide which quota options are enabled and return a string
+# which we can pass to quotaon
#
enable_fs_quotas()
{
@@ -1176,8 +1069,9 @@ stop: Could not match $OCF_RESKEY_device with a real device"
umount_failed=yes
if [ "$force_umount" ]; then
- killMountProcesses $mp
if [ $try -eq 1 ]; then
+ fuser -TERM -kvm "$mp"
+
if [ "$OCF_RESKEY_nfslock" = "yes" ] || \
[ "$OCF_RESKEY_nfslock" = "1" ]; then
ocf_log warning \
@@ -1189,15 +1083,11 @@ stop: Could not match $OCF_RESKEY_device with a real device"
notify_list_store $mp/.clumanager/statd
nfslock_reclaim=1
fi
+ else
+ fuser -kvm "$mp"
fi
fi
- if [ $try -ge $max_tries ]; then
- done=$YES
- else
- sleep $sleep_time
- let try=try+1
- fi
;;
*)
return $FAIL
@@ -1206,7 +1096,7 @@ stop: Could not match $OCF_RESKEY_device with a real device"
if [ $try -ge $max_tries ]; then
done=$YES
- elif [ "$done" -ne "$YES" ]; then
+ elif [ "$done" != "$YES" ]; then
sleep $sleep_time
let try=try+1
fi
diff --git a/rgmanager/src/resources/netfs.sh b/rgmanager/src/resources/netfs.sh
index bd391b6..2cc6863 100755
--- a/rgmanager/src/resources/netfs.sh
+++ b/rgmanager/src/resources/netfs.sh
@@ -359,113 +359,6 @@ isMounted () {
}
#
-# killMountProcesses mount_point
-#
-# Using lsof or fuser try to unmount the mount by killing of the processes
-# that might be keeping it busy.
-#
-killMountProcesses()
-{
- typeset -i ret=$SUCCESS
- typeset have_lsof=""
- typeset have_fuser=""
- typeset try
-
- if [ $# -ne 1 ]; then
- ocf_log err \
- "Usage: killMountProcesses mount_point"
- return $FAIL
- fi
-
- typeset mp=$1
-
- ocf_log notice "Forcefully unmounting $mp"
-
- #
- # Not all distributions have lsof. If not use fuser. If it
- # does, try both.
- #
- file=$(which lsof 2>/dev/null)
- if [ -f "$file" ]; then
- have_lsof=$YES
- fi
-
- file=$(which fuser 2>/dev/null)
- if [ -f "$file" ]; then
- have_fuser=$YES
- fi
-
- if [ -z "$have_lsof" -a -z "$have_fuser" ]; then
- ocf_log warn \
- "Cannot forcefully unmount $mp; cannot find lsof or fuser commands"
- return $FAIL
- fi
-
- for try in 1 2 3; do
- if [ -n "$have_lsof" ]; then
- #
- # Use lsof to free up mount point
- #
- while read command pid user
- do
- if [ -z "$pid" ]; then
- continue
- fi
-
- if [ $try -eq 1 ]; then
- ocf_log warn \
- "killing process $pid ($user $command $mp)"
- elif [ $try -eq 3 ]; then
- ocf_log crit \
- "Could not clean up mountpoint $mp"
- ret=$FAIL
- fi
-
- if [ $try -gt 1 ]; then
- kill -9 $pid
- else
- kill -TERM $pid
- fi
- done < <(lsof -w -bn 2>/dev/null | \
- grep -w -E "$mp(/.*|)\$" | \
- awk '{print $1,$2,$3}' | \
- sort -u -k 1,3)
- elif [ -n "$have_fuser" ]; then
- #
- # Use fuser to free up mount point
- #
- while read command pid user
- do
- if [ -z "$pid" ]; then
- continue
- fi
-
- if [ $try -eq 1 ]; then
- ocf_log warn \
- "killing process $pid ($user $command $mp)"
- elif [ $try -eq 3 ]; then
- ocf_log crit \
- "Could not clean up mount point $mp"
- ret=$FAIL
- fi
-
- if [ $try -gt 1 ]; then
- kill -9 $pid
- else
- kill -TERM $pid
- fi
- done < <(fuser -vm $mp 2>&1 | \
- grep -v PID | \
- sed 's;^'$mp:';;' | \
- awk '{print $4,$2,$1}' | \
- sort -u -k 1,3)
- fi
- done
-
- return $ret
-}
-
-#
# startNFSFilesystem
#
startNFSFilesystem() {
@@ -659,15 +552,20 @@ stopNFSFilesystem() {
umount_failed=yes
- if [ "$force_umount" ]; then
- killMountProcesses $mp
- fi
+ if [ "$force_umount" ]; then
+ if [ $try -eq 1 ]; then
+ fuser -TERM -kvm "$mp"
+ else
+ fuser -kvm "$mp"
+ fi
+ fi
- if [ $try -ge $max_tries ]; then
- done=$YES
- else
- sleep $sleep_time
- let try=try+1
+
+ if [ $try -ge $max_tries ]; then
+ done=$YES
+ else
+ sleep $sleep_time
+ let try=try+1
fi
;;
*)