Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 110 additions & 16 deletions heartbeat/Filesystem
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,26 @@ currently accessing the mount directory.
avoid functions that could potentially block during process
detection
"false" : Do not kill any processes.
"move" : like "safe", but try to mount --move first

You may add one or more comma separated modifiers
"move", "[no]xargs", "[no]ps", "kill_one_by_one",
for example:

"true,xargs,nops":
find processes using system tools,
then kill them "simultaneously" using "xargs kill",
do not bother to show process details.

"safe,move,xargs,ps":
move the mount first, then
find processes by walking /proc/ "manually", then
use "xargs ps" to show process details before
using "xargs kill" to get rid of them.

"safe,move,noxargs,ps":
move the mount first, then
find processes by walking /proc/ "manually", then
show process details and kill them individually in a loop.

The 'safe' option uses shell logic to walk the /proc/<pid>/ directories
for pids using the mount point while the default option uses the
Expand All @@ -281,8 +300,9 @@ party apps, we likely never win the race and the file system will be kept busy.
Which may result in a timeout and stop failure, potentially escalating to
hard-reset of this node via fencing.

The 'move' option tries to move the mount point somewhere those "rogue apps"
do not expect it, then proceed to kill current users and attempt to umount.
The 'move' option tries to "mount --move" the mount point somewhere those
"rogue apps" do not expect it, then proceed to kill current users and attempt
to umount.

For 'move' to work, you will have to make sure the mount point does not reside
under a shared mount, for example by mount -o bind,private /mount /mount
Expand Down Expand Up @@ -773,22 +793,39 @@ signal_processes() {
if [ $nr_pids = 0 ]; then
ocf_log info "No processes on $dir were signalled. force_unmount is set to '$FORCE_UNMOUNT'"
return 1
elif [ $nr_pids -le 24 ]; then
fi
if $do_xargs_kill; then
if $do_ps_f; then
# echo "$pids" | xargs -r kill -s STOP
echo "sending signal $sig to $nr_pids processes:"

# According to my man page, 'ps -f "$pid"' might be good enough,
# but needs rather specific formatting of that argument.
# And 'ps -f $pid' might produce too many words.
# Use xargs anyway.
echo "$pids" | xargs ps -f 2>&1
fi
echo "$pids" | xargs -r kill -s $sig 2>&1
if [ $nr_pids -gt 24 ]; then
sed_script="11 s/^.*/... and more .../; 12,$(( $nr_pids - 10))d"
pids=$(echo "$pids" | sed -e "$sed_script" | tr '\n' ' ')
fi
echo "sent signals $sig to ${nr_pids} processes ["${pids}"]"
else
for pid in $pids; do
ocf_log info "sending signal $sig to: $(ps -f $pid | tail -1)"
if $do_ps_f; then
echo "sending signal $sig to: $(ps -f $pid | tail -1)"
else
echo "sending signal $sig to $pid"
fi
kill -s $sig $pid
done
else
echo "$pids" | xargs -r kill -s $sig
sed_script="11 s/^.*/... and more .../; 12,$(( $nr_pids - 10))d"
pids=$(echo "$pids" | sed -e "$sed_script" | tr '\n' ' ')
ocf_log info "sent signals $sig to ${nr_pids} processes [${pids}]"
fi
fi | ocf_log_pipe info
return 0
}
try_umount() {
local force_arg="$1" SUB="$2"
$UMOUNT $force_arg "$SUB"
$UMOUNT $force_arg "$SUB" 2>&1 | ocf_log_pipe warn
list_mounts | grep "${TAB}${SUB}${TAB}" >/dev/null 2>&1 || {
ocf_log info "unmounted $SUB successfully"
return $OCF_SUCCESS
Expand Down Expand Up @@ -1181,10 +1218,67 @@ if [ ! -z "$OCF_RESKEY_options" ]; then
fi
FAST_STOP=${OCF_RESKEY_fast_stop:="yes"}

case $FORCE_UNMOUNT in
move) move_before_umount=true; FORCE_UNMOUNT=safe ;;
*) move_before_umount=false ;;
esac
parse_force_unmount_modifiers()
{
# keep previous "kill one by one" behavior as default for now
move_before_umount=false
do_xargs_kill=false
do_ps_f=true

local IFS=','
local m

set -- $FORCE_UNMOUNT

if [ $1 = "move" ]; then
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should use a case for this parameter, and add it to the the for-case below, so we dont depend on it being first.

FORCE_UNMOUNT=safe
else
FORCE_UNMOUNT=$1
shift
fi

for m ; do
case $m in
move)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We shouldnt set multiple variables here, as they might be changed or not depending on order later, so we should also make sure that opposites, e.g. only move and nomove are not set.

Let's set one variable to true or false per match.

move_before_umount=true
do_xargs_kill=true
do_ps_f=false
;;
nomove)
# in case this becomes the default
move_before_umount=false
;;
ps)
do_ps_f=true
;;
nops)
do_ps_f=false
;;
xargs)
do_xargs_kill=true
;;
noxargs)
do_xargs_kill=false
;;
kill_one_by_one)
do_xargs_kill=false
do_ps_f=true
;;

# catch typos
*)
ocf_log warn "force_unmount: unknown modifier $m ignored"
esac
done

# catch typos
if ! ocf_is_true $FORCE_UNMOUNT && [ $FORCE_UNMOUNT != "safe" ]; then
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can be removed if you move it to a case-statement in parse_force_unmount_modifiers()

ocf_log warn "force_unmount: value '$FORCE_UNMOUNT' interpreted as 'false'"
fi
}

parse_force_unmount_modifiers


OP=$1

Expand Down
64 changes: 64 additions & 0 deletions heartbeat/ocf-shellfuncs.in
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,70 @@ ocf_log() {
fi
}

# ocf_log_pipe:
# similar to ocf_log, but read messages from stdin.
# Prefix each line with HA_LOGTAG and timestamp, and output
# to terminal / syslog / HA_LOGFILE / HA_DEBUGLOG / stderr
# as appropriate.
# Arguments: loglevel (crit, err, warn, info)
#
# Linux + systemd specific: could use systemd-cat.
# Could use logger with structured data and message-id and whatnot, if available.
ocf_log_pipe()
{
local loglevel=$1
local __OCF_PRIO

case "$loglevel" in
crit) __OCF_PRIO="CRIT";;
err) __OCF_PRIO="ERROR";;
warn) __OCF_PRIO="WARNING";;
info) __OCF_PRIO="INFO";;
debug)__OCF_PRIO="DEBUG";;
*) __OCF_PRIO=`echo ${__OCF_PRIO}| tr '[a-z]' '[A-Z]'`
loglevel=notice
;;
esac

if [ "$loglevel" = debug ] ; then
if [ "$HA_debug" = 0 ] || [ -z "$HA_debug" ]; then
# Avoid sigpipe, drain to dev null.
cat > /dev/null
return
fi
fi

# ensure trailing blank in date format
case $HA_DATEFMT in *" ") :;; *) HA_DATEFMT="${HA_DATEFMT} ";; esac
export HA_LOGTAG HA_DATEFMT __OCF_PRIO

if test -t 2 ; then
awk '{ print strftime(ENVIRON["HA_DATEFMT"]) ENVIRON["HA_LOGTAG"] ": " ENVIRON["__OCF_PRIO"] ": " $0 }' >&2
return
fi

# should be already set, but just in case:
set_logtag

[ none = "$HA_LOGFACILITY" ] && HA_LOGFACILITY=""
if [ -n "$HA_LOGFACILITY" ]; then
sed -e "s/^/$__OCF_PRIO: /" \
| logger -t "$HA_LOGTAG" -p ${HA_LOGFACILITY}.${loglevel}
fi
if [ "$loglevel" != "debug" ] && [ -n "$HA_LOGFILE" ]; then
awk '{ print strftime(ENVIRON["HA_DATEFMT"]) ENVIRON["HA_LOGTAG"] ": " ENVIRON["__OCF_PRIO"] ": " $0 }' >> "$HA_LOGFILE"
fi
if [ -n "$HA_DEBUGLOG" ] && [ "$HA_LOGFILE" != "$HA_DEBUGLOG" ]; then
# I don't know why "logfile" is "{date}{tag}: ",
# and "debugfile" is "{tag}:\t{date}".
# But this is how ocf_log did it; behave the same.
awk '{ print ENVIRON["HA_LOGTAG"] "\t" strftime(ENVIRON["HA_DATEFMT"]) ENVIRON["__OCF_PRIO"] ": " $0 }' >> "$HA_DEBUGLOG"
fi
if [ -z "$HA_LOGFACILITY" -a -z "$HA_LOGFILE" ] ; then
awk '{ print strftime(ENVIRON["HA_DATEFMT"]) ENVIRON["HA_LOGTAG"] ": " ENVIRON["__OCF_PRIO"] ": " $0 }' >&2
fi
}

#
# ocf_exit_reason: print exit error string to stderr
# Usage: Allows the OCF script to provide a string
Expand Down