Add dracut-memdebug-ko.sh, install it to the dracut kdump module.
The principle is to use kernel trace to track buddy page allocation
events during kernel module loading(module_init), thus we can analyze
all the trace data and get the total memory consumption. as for large
slab allocation, it will fall into buddy, thus tracing "mm_page_alloc"
only should be enough for the purpose.
One major flaw of this method is that it consumes a lot of memory, users
should increase the crash kernel memory reservation or trace buffer size
(via "trace_buf_size=nn[KMG]") as needed.
Signed-off-by: Xunlei Pang <xlpang(a)redhat.com>
---
dracut-memdebug-ko.sh | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++
kexec-tools.spec | 2 +
2 files changed, 119 insertions(+)
create mode 100755 dracut-memdebug-ko.sh
diff --git a/dracut-memdebug-ko.sh b/dracut-memdebug-ko.sh
new file mode 100755
index 0000000..cd22be7
--- /dev/null
+++ b/dracut-memdebug-ko.sh
@@ -0,0 +1,117 @@
+# Try to find out kernel modules with large total memory allocation during loading.
+# For large slab allocation, it will fall into buddy, thus tracing
"mm_page_alloc"
+# only(this saves us lots of trace buffer memory) should be enough for the purpose.
+
+parse_trace_data=$1
+TRACE_BASE="/sys/kernel/debug"
+# trace access through debugfs would be obsolete if "/sys/kernel/tracing" is
available.
+if [[ -d "/sys/kernel/tracing" ]]; then
+ TRACE_BASE="/sys/kernel"
+fi
+
+if ! [[ "$parse_trace_data" ]]; then
+ # old debugfs case.
+ if ! [[ -d "$TRACE_BASE/tracing" ]]; then
+ mount none -t debugfs $TRACE_BASE
+ # new tracefs case.
+ elif ! [[ -f $TRACE_BASE/tracing/tracing_on ]]; then
+ mount none -t tracefs "$TRACE_BASE/tracing"
+ fi
+
+ if ! [[ -f "$TRACE_BASE/tracing/tracing_on" ]]; then
+ warn "Mount trace failed for kernel module memory analyzing."
+ return 0
+ fi
+
+ # Prepare trace setup.
+ echo 1 > $TRACE_BASE/tracing/events/kmem/mm_page_alloc/enable
+ echo 1 > $TRACE_BASE/tracing/events/module/module_load/enable
+ echo 1 > $TRACE_BASE/tracing/events/module/module_put/enable
+ echo 1 > $TRACE_BASE/tracing/tracing_on
+
+ # 5MB should be big enough for most cases?
+ # Users can override it via "trace_buf_size=nn[KMG]" boot command.
+ cat /proc/cmdline | grep -q "trace_buf_size="
+ if [[ $? -ne 0 ]]; then
+ echo 5120 > $TRACE_BASE/tracing/buffer_size_kb
+ fi
+
+ # Clear trace data
+ echo > $TRACE_BASE/tracing/trace
+ return 0
+fi
+
+# Begin to parse trace data.
+if ! [[ -d "$TRACE_BASE/tracing" ]]; then
+ warn "Can't activate trace, skip kernel module memory analyzing!"
+ return 0
+fi
+
+# Temporarily turn off tracing during copy.
+echo 0 > $TRACE_BASE/tracing/tracing_on
+TMPFILE=/tmp/tmp$$$$
+cp $TRACE_BASE/tracing/trace $TMPFILE -f
+echo 1 > $TRACE_BASE/tracing/tracing_on
+
+# Indexed by task pid.
+declare -A current_module
+
+# Indexed by module name.
+declare -A module_loaded
+declare -A nr_alloc_pages
+
+while read pid cpu flags ts function ;
+do
+ # Skip comment lines
+ if [[ $pid = "#" ]]; then
+ continue
+ fi
+
+ if [[ $function = module_load* ]]; then
+ # One module is being loaded, save the task pid for tracking.
+ module_name=${function#*: }
+ module_names+=" $module_name"
+ current_module[$pid]="$module_name"
+ [[ ${module_loaded[$module_name]} ]] && echo "\"$module_name\"
was loaded multiple times!"
+ unset module_loaded[$module_name]
+ nr_alloc_pages[$module_name]=0
+ fi
+
+ if ! [[ ${current_module[$pid]} ]]; then
+ continue
+ fi
+
+ if [[ $function = module_put* ]]; then
+ # Mark the module as loaded
+ module_loaded[${current_module[$pid]}]=1
+ # Module has been loaded when module_put is called, untrack the task
+ unset current_module[$pid]
+ continue
+ fi
+
+ # Once we get here, the task is being tracked(is loading a module).
+ # Get the module name.
+ module_name=${current_module[$pid]}
+
+ if [[ $function = mm_page_alloc* ]]; then
+ order=$(echo $function | sed -e 's/.*order=\([0-9]*\) .*/\1/')
+ nr_alloc_pages[$module_name]=$((${nr_alloc_pages[$module_name]}+$((2 ** $order))))
+ fi
+done < $TMPFILE
+
+echo -e "\n\n== debug_mem for kernel modules during loading begin =="
>&2
+for i in $module_names; do
+ status="load finished"
+ if ! [[ ${module_loaded[$i]} ]]; then
+ status="loading"
+ fi
+ echo -e "${nr_alloc_pages[$i]} pages consumed by \"$i\" [$status]"
>&2
+done
+echo -e "== debug_mem for kernel modules during loading end ==\n\n" >&2
+
+unset module_names
+unset module_loaded
+
+rm $TMPFILE -f
+
+return 0
diff --git a/kexec-tools.spec b/kexec-tools.spec
index 1597071..691ad7a 100644
--- a/kexec-tools.spec
+++ b/kexec-tools.spec
@@ -41,6 +41,7 @@ Source103: dracut-kdump-error-handler.sh
Source104: dracut-kdump-emergency.service
Source105: dracut-kdump-error-handler.service
Source106: dracut-kdump-capture.service
+Source107: dracut-memdebug-ko.sh
Requires(post): systemd-units
Requires(preun): systemd-units
@@ -224,6 +225,7 @@ cp %{SOURCE103}
$RPM_BUILD_ROOT/etc/kdump-adv-conf/kdump_dracut_modules/99kdumpb
cp %{SOURCE104}
$RPM_BUILD_ROOT/etc/kdump-adv-conf/kdump_dracut_modules/99kdumpbase/%{remove_dracut_prefix
%{SOURCE104}}
cp %{SOURCE105}
$RPM_BUILD_ROOT/etc/kdump-adv-conf/kdump_dracut_modules/99kdumpbase/%{remove_dracut_prefix
%{SOURCE105}}
cp %{SOURCE106}
$RPM_BUILD_ROOT/etc/kdump-adv-conf/kdump_dracut_modules/99kdumpbase/%{remove_dracut_prefix
%{SOURCE106}}
+cp %{SOURCE107}
$RPM_BUILD_ROOT/etc/kdump-adv-conf/kdump_dracut_modules/99kdumpbase/%{remove_dracut_prefix
%{SOURCE107}}
chmod 755
$RPM_BUILD_ROOT/etc/kdump-adv-conf/kdump_dracut_modules/99kdumpbase/%{remove_dracut_prefix
%{SOURCE100}}
chmod 755
$RPM_BUILD_ROOT/etc/kdump-adv-conf/kdump_dracut_modules/99kdumpbase/%{remove_dracut_prefix
%{SOURCE101}}
--
1.8.3.1