Process 의 비정상적인 종료시 Core Dump 파일이 생기고, 시스템의 비정상적인 종료시 Crash Dump 파일이 생기죠?
하지만 대부분의 운영자및엔지니어가 비정상적인 종료에 대한 원인을 파악하기 위해 Dump 파일을 분석하려하지만 생각같이 쉽지많은 않다는것을 느낄껍니다.
Dump 파일이 Binary 파일이기 때문일꺼라 생각이 듭니다.
지금 올려드리는 스크립트는 Crash 파일이 생성되었을때 손쉽게 문제의 원인을 파악하고, 해당시스템의 환경까지 전부 분석을 자동으로 해주는 스크립트입니다
**************************************************************************
**************************************************************************
사용방법은 File 내에 나와 있지만 간략히 설명드리면 다음과 같습니다.
1. 해당파일을 /var/crash/`hostname` 아래에 넣습니다.
2. 해당파일에 실행퍼미션을 주셔야죠! ^^;
# chmod 755 analysis.sh
3. 분석하고자 하는 Crash Dump 파일을 아규먼트로 입력합니다.
# analysis.sh unix.0 vmcore.0
자동으로 분석이 주~루~룩.............
분석내용을 파일로 받으실려면,
# /var/crash/`hostnme`/analysis.sh unix.0 vmcore.0 >> analysis.txt
분석된파일을 보시고 Panic의 원인을 파악하시면 되겠죠..........
# vi /var/crash/`hostnme`/analysis.sh (아래문서를 카피해서 만드세요.....)
# chmod 744 /var/crash/`hostnme`/analysis.sh
###########################################################################
#!/bin/sh
#
# Developed by Sun Microsystems, Inc.
# 2550 Garcia Avenue
# Mountain View, California 94043
#
# Copyright (c) 1996, Sun Microsystems, Inc.
#
# RESTRICTED RIGHTS: Use, duplication or disclosure by the government is
# subject to the restrictions as set forth in
# subparagraph (c) (1) (ii) of the Rights in Technical Data and Computer
# Software Clause as DFARS 252.227-7013 and FAR 52.227-19.
#
# SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF
# THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
# IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
# PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR ANY DAMAGES
# SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS
# SOFTWARE OR ITS DERIVATIVES.
#
# By using or copying this Software, Licensee agrees to abide by the
# intellectual property laws, and all other applicable laws of the U.S.,
# and the terms of this license.
#
# ----------------------------------------------------------------------
#
# iscda -- Script to print out Initial System Crash Dump Analysis
#
# History:
#
# Edit 1 940707 krb First script for Solaris 2.3, Rev 1.0
# Edit 2 950220 krb Ported to Solaris 2.4
# Edit 3 950830 krb Time to start cleaning this up a bit
# Edit 4 960122 cjd Some 2.4 changes; put in more "conditional"
# execution of specific commands.
#
# Files this script creates:
#
# /tmp/iscda.{pid}
#
# Notes:
#
# This script must be run on the same kernel architecture and
# under the same OS as the system crash dump we are analyzing.
#
# ----------------------------------------------------------------------
if [ ! $# = 2 ]
then
echo
echo "Usage: iscda unixfile corefile > your-output-file"
echo
echo "Examples: iscda unix.3 vmcore.3"
echo " iscda /dev/ksyms /dev/mem"
echo
exit
fi
#
# Make sure we can see the files.
#
if [ ! -f $1 ] && [ ! $1 = "/dev/ksyms" ]
then
echo; echo "Unable to locate $1. Exiting."
echo; exit
fi
if [ ! -f $2 ] && [ ! $2 = "/dev/mem" ]
then
echo; echo "Unable to locate $2. Exiting."
echo; exit
fi
PATH=${PATH}:/etc
export PATH
# Find the release we're dealing with.
rel=`echo '$<utsname' | adb -k $1 $2 | grep release | sed 's/^.* //'`
case "$rel" in
5.4)
PROCOFFSET=268
;;
5.*)
PROCOFFSET=260
;;
*)
echo "Operating system release $rel is not supported by this script."
exit
;;
esac
#
# Okay, we are ready! Create and put some stuff into output file.
#
echo "Working....."
cat <<EOC
******************************************************************************
Initial System Crash Dump Analysis Output iscda Rev 1.4
`date`
******************************************************************************
EOC
#
# We will be creating our own macros for use in adb sessions.
# Odds are we will create a unique filename if we use the PID.
# If it is not unique, let the user do something about it.
#
WHERE=/tmp/iscda.$$
if [ -f $WHERE ]
then
echo; echo "File $WHERE already exists. Exiting."
echo; exit
fi
#
# proc address is offset by a different number depending on the
# release. Computed above.
#
cat > $WHERE <<EOC
*(<c+0t28)>n
<c+8/X"Thread address"
*(<c+8)>p
<p+a0/X"Proc address"
*(<p+a0)>j
<j+${PROCOFFSET}/s
.,#((*(<c+8))-(*(<c+c)))="This CPU was idle"
0,#(#(<n))(#(<n-<e))=n"Next CPU..."n
<n>c
<n,#(#(<n))(#(<n-<e))\$<$WHERE
EOC
#
# Get initial information from adb
#
cat <<EOC
************************************
** Initial information from adb **
************************************
EOC
adb -k $1 $2 <<EOA
\$<utsname
srpc_domain/s16t"Domain name"
lbolt>a
*time-(*<a%0t100)=Y16t"Time of boot"
time/Y16t"Time of crash"
,#(*audit_active)=n"Auditing is not enabled"
,##(*audit_active)=n"Auditing is enabled"
,#(*quotas_initialized)=n"Quotas are not enabled"
,##(*quotas_initialized)=n"Quotas are enabled"
=nn"** Panic String **"
="--------------------"
*panicstr/s
=nn"** Stack Backtrace **"
="-----------------------"
\$c
=nn"** Per CPU information **"
="---------------------------"
ncpus/X"# of CPUs present"
ncpus_online/X"# of CPUs online"
=nn
*cpu_list>c
<c>e
<c,#(<c)="The cpu_list pointer is NULL. Not a good sign."n
<c,#(#(<c))\$<$WHERE
=nn"** Stacktrace **"
="-----------------"
<sp\$<stacktrace
=nn
="** CPU structures **"
="--------------------"
\$<cpus
=nn
="** Msgbuf **"
="------------"
\$<msgbuf
EOA
cat > $WHERE <<EOC
=nn"** System is sun4m architecture - Checking enable_sm_wa **"
="------------------------------------------------------------"
cpunodes/s
enable_sm_wa/D
,##(*enable_sm_wa)="Patch 101406 is installed"
,#(*enable_sm_wa)="Patch 100406 is not installed"
EOC
adb -k $1 $2 <<EOA
0,#(*(utsname+408)-6d000000)\$<$WHERE
EOA
#
# Get process information by using crash.
#
cat <<EOC
**************************************
** Process information from crash **
**************************************
EOC
crash -d $2 -n $1 <<EOC
p -e
EOC
#
#
# Get strings output to capture message buffer.
#
cat <<EOC
******************************************************
** Strings output of complete message ring buffer **
******************************************************
EOC
strings $2 | head -200
#
# Additional adb stuff
#
cat <<EOC
***********************
** Some Statistics **
***********************
EOC
case "$rel" in
5.4)
#
## Get DNLC out of adb, kma out of crash.
## Streams stats are not available.
#
adb -k $1 $2 <<EOA
=nn"** Directory Name Lookup Cache Statistics **"
="----------------------------------------------"
ncsize/D"Directory name cache size"
ncstats/D"# of cache hits that we used"
+/D"# of misses"
+/D"# of enters done"
+/D"# of enters tried when already cached"
+/D"# of long names tried to enter"
+/D"# of long name tried to look up"
+/D"# of times LRU list was empty"
+/D"# of purges of cache"
*ncstats+*(ncstats+4)+*(ncstats+14)>n
*ncstats*0t100%<n=D"Hit rate percentage"
="(See /usr/include/sys/dnlc.h for more information)"
EOA
cat <<EOC
** Kernel Memory Request Statistics **
----------------------------------------
EOC
crash -d $2 -n $1 <<EOC
kmastat
EOC
;;
5.*)
#
## For 5.3 and earlier, DNLC stats, streams stats,
## and kma stats are available through adb.
#
adb -k $1 $2 <<EOA
=nn"** Directory Name Lookup Cache Statistics **"
="----------------------------------------------"
ncsize/D"Directory name cache size"
ncstats/D"# of cache hits that we used"
+/D"# of misses"
+/D"# of enters done"
+/D"# of enters tried when already cached"
+/D"# of long names tried to enter"
+/D"# of long name tried to look up"
+/D"# of times LRU list was empty"
+/D"# of purges of cache"
*ncstats+*(ncstats+4)+*(ncstats+14)>n
*ncstats*0t100%<n=D"Hit rate percentage"
="(See /usr/include/sys/dnlc.h for more information)"
=nn"** Kernel Memory Request Statistics **"
="----------------------------------------"
="Small"16t"Large"16t"Outsized"
kmeminfo/3X"Owned by kmem"
+/3X"Mem allocated"
+/3X"# of failures"n
pagesize/D"Memory page size"
="(See /usr/include/sys/sysinfo.h for more information)"
=nn"** Streams Statistics **"
="--------------------------"
="In use"16t"Total"16t"Maximum"16t"Failures"
strst/4X"Streams"
+/4X"Queues"
+/4X"MsgBlks"
+/4X"LinkBlks"
="(See /usr/include/sys/strstat.h for more information)"
EOA
;;
*)
;;
esac
#
# Print out some of the tunable variables. Do this via macros
# so that we don't get alot of symbol not found messages if
# the driver was not modloaded for use.
#
cat > $WHERE <<EOC
=nn"** Shared Memory Tuning Variables (if in use) **"
="--------------------------------------------------"
shminfo_shmmax/D"Max segment size"
+/D"Min segment size"
+/D"Max identifiers"
+/D"Max attached shm segs per proc"
EOC
adb -k $1 $2 <<EOA
\$<$WHERE
EOA
cat > $WHERE <<EOC
=nn"** Semaphore Tuning Variables (if in use) **"
="----------------------------------------------"
seminfo_semmap/D"Entries per map"
+/D"Max identifiers"
+/D"Max in system"
+/D"Max undos"
+/D"Max sems per id"
+/D"Max ops per semop"
+/D"Max undos per proc"
+/D"Max bytes in undos"
+/D"Max sem value"
+/D"Max adjust on exit"
EOC
adb -k $1 $2 <<EOA
\$<$WHERE
EOA
cat > $WHERE <<EOC
=nn"** Message Queue Tuning Variables (if in use) **"
="--------------------------------------------------"
msginfo_msgmap/D"Max entries in map"
+/D"Max message size"
+/D"Max bytes on queue"
+/D"Max msg queue ids"
+/D"Max segment size (word size multiple)"
+/D"Max system message headers"
+/d16t"Max msg segments (must be < 32768)"
EOC
adb -k $1 $2 <<EOA
\$<$WHERE
EOA
##
## optional - eats up lots of space
##
#
#############################################
#panicstring=`echo "*panicstr/s" | adb -k $1 $2 | sed -e '1d' -e 's/^.* //' | head -1`
#if [ "$panicstring" = "zero" ] ; then
#adb -k $1 $2 << EOA
#\$<threadlist
#EOA
#fi
#
rm $WHERE
#
## Get some basic configuration information which is always
## useful.
#
cat << EOC
************************************
** Current patch revision status **
************************************
EOC
showrev -p
cat << EOC
****************************************
** Hardware Configuration Information **
****************************************
EOC
prtconf -vp
echo "*****"
echo "Done!"
############################################
#
# +---------------------------------------------------------------------+
# | For more information about system crash dump analysis, refer to |
# | the SunSoft Press book, "Panic! UNIX System Crash Dump Analysis", |
# | ISBN 0-13-149386-8, published by Prentice Hall. |
# +---------------------------------------------------------------------+
#
############################################
#
# end of iscda
#
출처 : http://www.wowunix.com
하지만 대부분의 운영자및엔지니어가 비정상적인 종료에 대한 원인을 파악하기 위해 Dump 파일을 분석하려하지만 생각같이 쉽지많은 않다는것을 느낄껍니다.
Dump 파일이 Binary 파일이기 때문일꺼라 생각이 듭니다.
지금 올려드리는 스크립트는 Crash 파일이 생성되었을때 손쉽게 문제의 원인을 파악하고, 해당시스템의 환경까지 전부 분석을 자동으로 해주는 스크립트입니다
**************************************************************************
**************************************************************************
사용방법은 File 내에 나와 있지만 간략히 설명드리면 다음과 같습니다.
1. 해당파일을 /var/crash/`hostname` 아래에 넣습니다.
2. 해당파일에 실행퍼미션을 주셔야죠! ^^;
# chmod 755 analysis.sh
3. 분석하고자 하는 Crash Dump 파일을 아규먼트로 입력합니다.
# analysis.sh unix.0 vmcore.0
자동으로 분석이 주~루~룩.............
분석내용을 파일로 받으실려면,
# /var/crash/`hostnme`/analysis.sh unix.0 vmcore.0 >> analysis.txt
분석된파일을 보시고 Panic의 원인을 파악하시면 되겠죠..........
# vi /var/crash/`hostnme`/analysis.sh (아래문서를 카피해서 만드세요.....)
# chmod 744 /var/crash/`hostnme`/analysis.sh
###########################################################################
#!/bin/sh
#
# Developed by Sun Microsystems, Inc.
# 2550 Garcia Avenue
# Mountain View, California 94043
#
# Copyright (c) 1996, Sun Microsystems, Inc.
#
# RESTRICTED RIGHTS: Use, duplication or disclosure by the government is
# subject to the restrictions as set forth in
# subparagraph (c) (1) (ii) of the Rights in Technical Data and Computer
# Software Clause as DFARS 252.227-7013 and FAR 52.227-19.
#
# SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF
# THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
# IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
# PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR ANY DAMAGES
# SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS
# SOFTWARE OR ITS DERIVATIVES.
#
# By using or copying this Software, Licensee agrees to abide by the
# intellectual property laws, and all other applicable laws of the U.S.,
# and the terms of this license.
#
# ----------------------------------------------------------------------
#
# iscda -- Script to print out Initial System Crash Dump Analysis
#
# History:
#
# Edit 1 940707 krb First script for Solaris 2.3, Rev 1.0
# Edit 2 950220 krb Ported to Solaris 2.4
# Edit 3 950830 krb Time to start cleaning this up a bit
# Edit 4 960122 cjd Some 2.4 changes; put in more "conditional"
# execution of specific commands.
#
# Files this script creates:
#
# /tmp/iscda.{pid}
#
# Notes:
#
# This script must be run on the same kernel architecture and
# under the same OS as the system crash dump we are analyzing.
#
# ----------------------------------------------------------------------
if [ ! $# = 2 ]
then
echo
echo "Usage: iscda unixfile corefile > your-output-file"
echo
echo "Examples: iscda unix.3 vmcore.3"
echo " iscda /dev/ksyms /dev/mem"
echo
exit
fi
#
# Make sure we can see the files.
#
if [ ! -f $1 ] && [ ! $1 = "/dev/ksyms" ]
then
echo; echo "Unable to locate $1. Exiting."
echo; exit
fi
if [ ! -f $2 ] && [ ! $2 = "/dev/mem" ]
then
echo; echo "Unable to locate $2. Exiting."
echo; exit
fi
PATH=${PATH}:/etc
export PATH
# Find the release we're dealing with.
rel=`echo '$<utsname' | adb -k $1 $2 | grep release | sed 's/^.* //'`
case "$rel" in
5.4)
PROCOFFSET=268
;;
5.*)
PROCOFFSET=260
;;
*)
echo "Operating system release $rel is not supported by this script."
exit
;;
esac
#
# Okay, we are ready! Create and put some stuff into output file.
#
echo "Working....."
cat <<EOC
******************************************************************************
Initial System Crash Dump Analysis Output iscda Rev 1.4
`date`
******************************************************************************
EOC
#
# We will be creating our own macros for use in adb sessions.
# Odds are we will create a unique filename if we use the PID.
# If it is not unique, let the user do something about it.
#
WHERE=/tmp/iscda.$$
if [ -f $WHERE ]
then
echo; echo "File $WHERE already exists. Exiting."
echo; exit
fi
#
# proc address is offset by a different number depending on the
# release. Computed above.
#
cat > $WHERE <<EOC
*(<c+0t28)>n
<c+8/X"Thread address"
*(<c+8)>p
<p+a0/X"Proc address"
*(<p+a0)>j
<j+${PROCOFFSET}/s
.,#((*(<c+8))-(*(<c+c)))="This CPU was idle"
0,#(#(<n))(#(<n-<e))=n"Next CPU..."n
<n>c
<n,#(#(<n))(#(<n-<e))\$<$WHERE
EOC
#
# Get initial information from adb
#
cat <<EOC
************************************
** Initial information from adb **
************************************
EOC
adb -k $1 $2 <<EOA
\$<utsname
srpc_domain/s16t"Domain name"
lbolt>a
*time-(*<a%0t100)=Y16t"Time of boot"
time/Y16t"Time of crash"
,#(*audit_active)=n"Auditing is not enabled"
,##(*audit_active)=n"Auditing is enabled"
,#(*quotas_initialized)=n"Quotas are not enabled"
,##(*quotas_initialized)=n"Quotas are enabled"
=nn"** Panic String **"
="--------------------"
*panicstr/s
=nn"** Stack Backtrace **"
="-----------------------"
\$c
=nn"** Per CPU information **"
="---------------------------"
ncpus/X"# of CPUs present"
ncpus_online/X"# of CPUs online"
=nn
*cpu_list>c
<c>e
<c,#(<c)="The cpu_list pointer is NULL. Not a good sign."n
<c,#(#(<c))\$<$WHERE
=nn"** Stacktrace **"
="-----------------"
<sp\$<stacktrace
=nn
="** CPU structures **"
="--------------------"
\$<cpus
=nn
="** Msgbuf **"
="------------"
\$<msgbuf
EOA
cat > $WHERE <<EOC
=nn"** System is sun4m architecture - Checking enable_sm_wa **"
="------------------------------------------------------------"
cpunodes/s
enable_sm_wa/D
,##(*enable_sm_wa)="Patch 101406 is installed"
,#(*enable_sm_wa)="Patch 100406 is not installed"
EOC
adb -k $1 $2 <<EOA
0,#(*(utsname+408)-6d000000)\$<$WHERE
EOA
#
# Get process information by using crash.
#
cat <<EOC
**************************************
** Process information from crash **
**************************************
EOC
crash -d $2 -n $1 <<EOC
p -e
EOC
#
#
# Get strings output to capture message buffer.
#
cat <<EOC
******************************************************
** Strings output of complete message ring buffer **
******************************************************
EOC
strings $2 | head -200
#
# Additional adb stuff
#
cat <<EOC
***********************
** Some Statistics **
***********************
EOC
case "$rel" in
5.4)
#
## Get DNLC out of adb, kma out of crash.
## Streams stats are not available.
#
adb -k $1 $2 <<EOA
=nn"** Directory Name Lookup Cache Statistics **"
="----------------------------------------------"
ncsize/D"Directory name cache size"
ncstats/D"# of cache hits that we used"
+/D"# of misses"
+/D"# of enters done"
+/D"# of enters tried when already cached"
+/D"# of long names tried to enter"
+/D"# of long name tried to look up"
+/D"# of times LRU list was empty"
+/D"# of purges of cache"
*ncstats+*(ncstats+4)+*(ncstats+14)>n
*ncstats*0t100%<n=D"Hit rate percentage"
="(See /usr/include/sys/dnlc.h for more information)"
EOA
cat <<EOC
** Kernel Memory Request Statistics **
----------------------------------------
EOC
crash -d $2 -n $1 <<EOC
kmastat
EOC
;;
5.*)
#
## For 5.3 and earlier, DNLC stats, streams stats,
## and kma stats are available through adb.
#
adb -k $1 $2 <<EOA
=nn"** Directory Name Lookup Cache Statistics **"
="----------------------------------------------"
ncsize/D"Directory name cache size"
ncstats/D"# of cache hits that we used"
+/D"# of misses"
+/D"# of enters done"
+/D"# of enters tried when already cached"
+/D"# of long names tried to enter"
+/D"# of long name tried to look up"
+/D"# of times LRU list was empty"
+/D"# of purges of cache"
*ncstats+*(ncstats+4)+*(ncstats+14)>n
*ncstats*0t100%<n=D"Hit rate percentage"
="(See /usr/include/sys/dnlc.h for more information)"
=nn"** Kernel Memory Request Statistics **"
="----------------------------------------"
="Small"16t"Large"16t"Outsized"
kmeminfo/3X"Owned by kmem"
+/3X"Mem allocated"
+/3X"# of failures"n
pagesize/D"Memory page size"
="(See /usr/include/sys/sysinfo.h for more information)"
=nn"** Streams Statistics **"
="--------------------------"
="In use"16t"Total"16t"Maximum"16t"Failures"
strst/4X"Streams"
+/4X"Queues"
+/4X"MsgBlks"
+/4X"LinkBlks"
="(See /usr/include/sys/strstat.h for more information)"
EOA
;;
*)
;;
esac
#
# Print out some of the tunable variables. Do this via macros
# so that we don't get alot of symbol not found messages if
# the driver was not modloaded for use.
#
cat > $WHERE <<EOC
=nn"** Shared Memory Tuning Variables (if in use) **"
="--------------------------------------------------"
shminfo_shmmax/D"Max segment size"
+/D"Min segment size"
+/D"Max identifiers"
+/D"Max attached shm segs per proc"
EOC
adb -k $1 $2 <<EOA
\$<$WHERE
EOA
cat > $WHERE <<EOC
=nn"** Semaphore Tuning Variables (if in use) **"
="----------------------------------------------"
seminfo_semmap/D"Entries per map"
+/D"Max identifiers"
+/D"Max in system"
+/D"Max undos"
+/D"Max sems per id"
+/D"Max ops per semop"
+/D"Max undos per proc"
+/D"Max bytes in undos"
+/D"Max sem value"
+/D"Max adjust on exit"
EOC
adb -k $1 $2 <<EOA
\$<$WHERE
EOA
cat > $WHERE <<EOC
=nn"** Message Queue Tuning Variables (if in use) **"
="--------------------------------------------------"
msginfo_msgmap/D"Max entries in map"
+/D"Max message size"
+/D"Max bytes on queue"
+/D"Max msg queue ids"
+/D"Max segment size (word size multiple)"
+/D"Max system message headers"
+/d16t"Max msg segments (must be < 32768)"
EOC
adb -k $1 $2 <<EOA
\$<$WHERE
EOA
##
## optional - eats up lots of space
##
#
#############################################
#panicstring=`echo "*panicstr/s" | adb -k $1 $2 | sed -e '1d' -e 's/^.* //' | head -1`
#if [ "$panicstring" = "zero" ] ; then
#adb -k $1 $2 << EOA
#\$<threadlist
#EOA
#fi
#
rm $WHERE
#
## Get some basic configuration information which is always
## useful.
#
cat << EOC
************************************
** Current patch revision status **
************************************
EOC
showrev -p
cat << EOC
****************************************
** Hardware Configuration Information **
****************************************
EOC
prtconf -vp
echo "*****"
echo "Done!"
############################################
#
# +---------------------------------------------------------------------+
# | For more information about system crash dump analysis, refer to |
# | the SunSoft Press book, "Panic! UNIX System Crash Dump Analysis", |
# | ISBN 0-13-149386-8, published by Prentice Hall. |
# +---------------------------------------------------------------------+
#
############################################
#
# end of iscda
#
출처 : http://www.wowunix.com