diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem index 3fdd233c7..77d9de1fd 100755 --- a/heartbeat/Filesystem +++ b/heartbeat/Filesystem @@ -269,7 +269,26 @@ currently accessing the mount directory. avoid functions that could potentially block during process detection "false" : Do not kill any processes. -"move" : like "safe", but try to mount --move first + +You may add one or more comma separated modifiers +"move", "[no]xargs", "[no]ps", "kill_one_by_one", +for example: + +"true,xargs,nops": + find processes using system tools, + then kill them "simultaneously" using "xargs kill", + do not bother to show process details. + +"safe,move,xargs,ps": + move the mount first, then + find processes by walking /proc/ "manually", then + use "xargs ps" to show process details before + using "xargs kill" to get rid of them. + +"safe,move,noxargs,ps": + move the mount first, then + find processes by walking /proc/ "manually", then + show process details and kill them individually in a loop. The 'safe' option uses shell logic to walk the /proc// directories for pids using the mount point while the default option uses the @@ -281,8 +300,9 @@ party apps, we likely never win the race and the file system will be kept busy. Which may result in a timeout and stop failure, potentially escalating to hard-reset of this node via fencing. -The 'move' option tries to move the mount point somewhere those "rogue apps" -do not expect it, then proceed to kill current users and attempt to umount. +The 'move' option tries to "mount --move" the mount point somewhere those +"rogue apps" do not expect it, then proceed to kill current users and attempt +to umount. For 'move' to work, you will have to make sure the mount point does not reside under a shared mount, for example by mount -o bind,private /mount /mount @@ -773,22 +793,39 @@ signal_processes() { if [ $nr_pids = 0 ]; then ocf_log info "No processes on $dir were signalled. force_unmount is set to '$FORCE_UNMOUNT'" return 1 - elif [ $nr_pids -le 24 ]; then + fi + if $do_xargs_kill; then + if $do_ps_f; then + # echo "$pids" | xargs -r kill -s STOP + echo "sending signal $sig to $nr_pids processes:" + + # According to my man page, 'ps -f "$pid"' might be good enough, + # but needs rather specific formatting of that argument. + # And 'ps -f $pid' might produce too many words. + # Use xargs anyway. + echo "$pids" | xargs ps -f 2>&1 + fi + echo "$pids" | xargs -r kill -s $sig 2>&1 + if [ $nr_pids -gt 24 ]; then + sed_script="11 s/^.*/... and more .../; 12,$(( $nr_pids - 10))d" + pids=$(echo "$pids" | sed -e "$sed_script" | tr '\n' ' ') + fi + echo "sent signals $sig to ${nr_pids} processes ["${pids}"]" + else for pid in $pids; do - ocf_log info "sending signal $sig to: $(ps -f $pid | tail -1)" + if $do_ps_f; then + echo "sending signal $sig to: $(ps -f $pid | tail -1)" + else + echo "sending signal $sig to $pid" + fi kill -s $sig $pid done - else - echo "$pids" | xargs -r kill -s $sig - sed_script="11 s/^.*/... and more .../; 12,$(( $nr_pids - 10))d" - pids=$(echo "$pids" | sed -e "$sed_script" | tr '\n' ' ') - ocf_log info "sent signals $sig to ${nr_pids} processes [${pids}]" - fi + fi | ocf_log_pipe info return 0 } try_umount() { local force_arg="$1" SUB="$2" - $UMOUNT $force_arg "$SUB" + $UMOUNT $force_arg "$SUB" 2>&1 | ocf_log_pipe warn list_mounts | grep "${TAB}${SUB}${TAB}" >/dev/null 2>&1 || { ocf_log info "unmounted $SUB successfully" return $OCF_SUCCESS @@ -1181,10 +1218,67 @@ if [ ! -z "$OCF_RESKEY_options" ]; then fi FAST_STOP=${OCF_RESKEY_fast_stop:="yes"} -case $FORCE_UNMOUNT in - move) move_before_umount=true; FORCE_UNMOUNT=safe ;; - *) move_before_umount=false ;; -esac +parse_force_unmount_modifiers() +{ + # keep previous "kill one by one" behavior as default for now + move_before_umount=false + do_xargs_kill=false + do_ps_f=true + + local IFS=',' + local m + + set -- $FORCE_UNMOUNT + + if [ $1 = "move" ]; then + FORCE_UNMOUNT=safe + else + FORCE_UNMOUNT=$1 + shift + fi + + for m ; do + case $m in + move) + move_before_umount=true + do_xargs_kill=true + do_ps_f=false + ;; + nomove) + # in case this becomes the default + move_before_umount=false + ;; + ps) + do_ps_f=true + ;; + nops) + do_ps_f=false + ;; + xargs) + do_xargs_kill=true + ;; + noxargs) + do_xargs_kill=false + ;; + kill_one_by_one) + do_xargs_kill=false + do_ps_f=true + ;; + + # catch typos + *) + ocf_log warn "force_unmount: unknown modifier $m ignored" + esac + done + + # catch typos + if ! ocf_is_true $FORCE_UNMOUNT && [ $FORCE_UNMOUNT != "safe" ]; then + ocf_log warn "force_unmount: value '$FORCE_UNMOUNT' interpreted as 'false'" + fi +} + +parse_force_unmount_modifiers + OP=$1 diff --git a/heartbeat/ocf-shellfuncs.in b/heartbeat/ocf-shellfuncs.in index 526be42b6..2d47d0be8 100644 --- a/heartbeat/ocf-shellfuncs.in +++ b/heartbeat/ocf-shellfuncs.in @@ -348,6 +348,70 @@ ocf_log() { fi } +# ocf_log_pipe: +# similar to ocf_log, but read messages from stdin. +# Prefix each line with HA_LOGTAG and timestamp, and output +# to terminal / syslog / HA_LOGFILE / HA_DEBUGLOG / stderr +# as appropriate. +# Arguments: loglevel (crit, err, warn, info) +# +# Linux + systemd specific: could use systemd-cat. +# Could use logger with structured data and message-id and whatnot, if available. +ocf_log_pipe() +{ + local loglevel=$1 + local __OCF_PRIO + + case "$loglevel" in + crit) __OCF_PRIO="CRIT";; + err) __OCF_PRIO="ERROR";; + warn) __OCF_PRIO="WARNING";; + info) __OCF_PRIO="INFO";; + debug)__OCF_PRIO="DEBUG";; + *) __OCF_PRIO=`echo ${__OCF_PRIO}| tr '[a-z]' '[A-Z]'` + loglevel=notice + ;; + esac + + if [ "$loglevel" = debug ] ; then + if [ "$HA_debug" = 0 ] || [ -z "$HA_debug" ]; then + # Avoid sigpipe, drain to dev null. + cat > /dev/null + return + fi + fi + + # ensure trailing blank in date format + case $HA_DATEFMT in *" ") :;; *) HA_DATEFMT="${HA_DATEFMT} ";; esac + export HA_LOGTAG HA_DATEFMT __OCF_PRIO + + if test -t 2 ; then + awk '{ print strftime(ENVIRON["HA_DATEFMT"]) ENVIRON["HA_LOGTAG"] ": " ENVIRON["__OCF_PRIO"] ": " $0 }' >&2 + return + fi + + # should be already set, but just in case: + set_logtag + + [ none = "$HA_LOGFACILITY" ] && HA_LOGFACILITY="" + if [ -n "$HA_LOGFACILITY" ]; then + sed -e "s/^/$__OCF_PRIO: /" \ + | logger -t "$HA_LOGTAG" -p ${HA_LOGFACILITY}.${loglevel} + fi + if [ "$loglevel" != "debug" ] && [ -n "$HA_LOGFILE" ]; then + awk '{ print strftime(ENVIRON["HA_DATEFMT"]) ENVIRON["HA_LOGTAG"] ": " ENVIRON["__OCF_PRIO"] ": " $0 }' >> "$HA_LOGFILE" + fi + if [ -n "$HA_DEBUGLOG" ] && [ "$HA_LOGFILE" != "$HA_DEBUGLOG" ]; then + # I don't know why "logfile" is "{date}{tag}: ", + # and "debugfile" is "{tag}:\t{date}". + # But this is how ocf_log did it; behave the same. + awk '{ print ENVIRON["HA_LOGTAG"] "\t" strftime(ENVIRON["HA_DATEFMT"]) ENVIRON["__OCF_PRIO"] ": " $0 }' >> "$HA_DEBUGLOG" + fi + if [ -z "$HA_LOGFACILITY" -a -z "$HA_LOGFILE" ] ; then + awk '{ print strftime(ENVIRON["HA_DATEFMT"]) ENVIRON["HA_LOGTAG"] ": " ENVIRON["__OCF_PRIO"] ": " $0 }' >&2 + fi +} + # # ocf_exit_reason: print exit error string to stderr # Usage: Allows the OCF script to provide a string