Translations of this page:

Check Nagios Performance

:!: Warning:
Sorry for bulking this information, I have not the time yet to write this in an acceptable manner.

Experienced Users will know how to use this stuff.

:!: Second Warning:
This Plugin won't work with Nagios 2.x and earlier.

The Plugin

#!/usr/bin/perl -w
#
# check_nagios_performance - nagios plugin
#
# Copyright (C) 2007,2008 Hendrik Baecker,
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version. 
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details. 
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA. 
#
#
# Report bugs to:
#
# Last Updated: 07.02.2008 Version 0.5.0
#
# ChangeLog
# -------------------------------------------------------------------
# By:	   Michael Luebben
# Date:	   23.01.2008
# Version: 0.3.1
# Changes:
#	   - Change Output to HTML-Output
#	   - Add more information and metric in output
#	   - Fix bug in output for version
#
# By:      Michael Luebben
# Date:    29.01.2008
# Version: 0.4.0
# Changes:
#          - Fix bug in object-parameter
#	   - Add check for supported nagios version
#	   - Add option m for average/last in minutes
#	   - Move opject lat to alat
#	   - Add object plat for passive host/service latency (average)
#
# By:      Michael Luebben
# Date:    07.02.2008
# Version: 0.5.0
# Changes:
#	   - Add object cobu check for command buffer usage
#	   - Add object ecol for last external commands
#	   - Delete switch command
#	   - Cleanup and documantation source code
#
# NOTE:
# =======
# Peformance monitoring for passive host/service checks (object plat) use
# nagiostat 3.0rc2 or higher!!!! 
# Earlier versions have a bug. 
 
# -------------------===== Set path =====--------------------
# path to util.pm !!
use lib "/usr/local/nagios/libexec";
 
# path to your nagiostats binary
my $bin="/usr/local/nagios/bin/nagiostats";
 
# path to your external command file
my $nagios_extcmd_file="/usr/local/nagios/var/rw/nagios.cmd";
 
# ------------------===== Used modules =====-----------------
use strict;
use Getopt::Long;
use vars qw($PROGNAME);
use utils qw ($TIMEOUT %ERRORS &print_revision &support);
 
# ---------------===== Declare variables =====---------------
my $opt_V; 	 # Version
my $opt_h;	 # Help
my $opt_e;	 # External command
my $opt_x;	 # Hostname for external command
my $opt_y;	 # Service description for external command
my $opt_w;	 # WARNING treshold
my $opt_c;	 # CRITICAL treshold
my $opt_o="";	 # Object
my $opt_t="10";	 # Timeout (sec); Default 10 Second
my $opt_m = "1"; # Average (min); Default 1 Minute
 
my $version="0.4.1";
 
my $getNagiosVersion;
my @nagiosVersion;
 
my $line;
my $value;
my %classService;
my $state = "OK";
my $output = "OK - ";
my %data;
my %outputText;
my %outputMetric;
my %outputPerfdata;
my $perfdata="";
my $perfdataFlag="0";
my $metricMin;
 
my $TotCmdBuf;
my $UsedCmdBuf;
my $HighCmdBuf;
my $TotChkBuf;
my $UsedChkBuf;
my $HighChkBuf;
 
sub print_help ();
sub print_usage ();
 
$PROGNAME="check_nagios_performance";
 
# -------------------===== Functions =====-------------------
sub _parse {
my $string = shift;
chomp $string;
my $tmp_string = $string;
$string =~ s/^;?([\d\.-]+)?;\s*//;
if( $tmp_string eq $string) {
printf("DEBUG: No pattern match in function _parse($string)\n");
return undef;
}
return undef unless ((defined $1 && $1 ne "") && ($string ne ";"));
return ($string,$1);
}
 
sub print_usage () {
print "Usage:\n";
print "  $PROGNAME -o <object>\n";
print "  $PROGNAME -o <object> -e --host <host_name> --service <service_description\n";
print "  $PROGNAME [-h | --help]\n";
print "  $PROGNAME [-V | --version]\n";
print "\n\nOptions:\n";
print "  -m, --minutes <in minute (default: 1 minute)>:\n";
print "     This options was only used for ahcl,ascl and ecol!!!\n";
print "        1 for 1 minute average/last\n";
print "        5 for 5 minute average/last\n";
print "        15 for 15 minute average/last\n";
print "\n";
print "  -o, --object <Object identifier> like:\n";
print "     ahcl for Active Host Checks Last\n";
print "     ascl for Active Service Checks Last\n";
print "     exec for average Host/Service Checks Executiontime\n";
print "     alat for average active Host/Service Checks latency\n";
print "     plat for average passive Host/Service Checks latency\n";
print "     cobu for external command buffer usage\n";
print "     ecol for last external commands\n";
print "\n";
print "  -e, --extcmd  External Command Mode\n";
print "     Prints out an external command to your external_command_file\n";
print "     Needed for this:\n";
print "     --host <host_name>\n";
print "     --service <service_description>\n";
print "\n";
print "   Warning and critical treshold only used for object cobu and rebu!\n";
print "  -w, --warning\n";
print "\n";
print "  -c, --critical\n";
print "\n";
print "  -h, --help\n";
print "     Print detailed help screen\n";
print "\n";
print "  -V, --version\n";
print "     Print version information\n\n";
}
 
sub print_help () {
print "Copyright (c) 2007 Hendrik Baecker\n\n";
print_usage();
print "\n";
}
 
# ---------------------===== Main =====----------------------
Getopt::Long::Configure('bundling');
GetOptions(
"V"   => \$opt_V, "version"     => \$opt_V,
"h"   => \$opt_h, "help"        => \$opt_h,
"o=s" => \$opt_o, "object=s"    => \$opt_o,
"e"   => \$opt_e, "extcmd"      => \$opt_e,
"x=s" => \$opt_x, "host=s"      => \$opt_x,
"y=s" => \$opt_y, "service=s"   => \$opt_y,
"t=i" => \$opt_t, "timeout=i"   => \$opt_t,
"w=i" => \$opt_w, "warning=i"   => \$opt_w,
"c=i" => \$opt_c, "critical=i"  => \$opt_c,
"m=i" => \$opt_m, "minutes=i"   => \$opt_m);
 
if ($opt_t) {
$TIMEOUT=$opt_t;
}
 
#
# ===== Just in case of problems, let's not hang Nagios =====
#
$SIG{'ALRM'} = sub {
print "UNKNOWN - Plugin Timed out\n";
exit $ERRORS{"UNKNOWN"};
};
alarm($TIMEOUT);
 
if ($opt_V) {
printf "Version ".$version."\n";
exit $ERRORS{'OK'};
}
 
if ($opt_h) {
print_help();
exit $ERRORS{'OK'};
}
 
#
# ===== Check supported Nagios Version =====
#
$getNagiosVersion = `$bin -m -D ";" -d NAGIOSVERSION`;
@nagiosVersion = split(/\./,$getNagiosVersion);
if ($nagiosVersion[0] != 3) {
printf "ERROR - Not supported Nagios Version\n";
exit $ERRORS{'UNKNOWN'};
}
 
#
# ===== Check and set average time =====
#
if ($opt_m == 1) {
$metricMin = "minute";
} elsif ($opt_m == "5" || $opt_m == "15") {
$metricMin = "minutes";
} else {
print "ERROR - you must set the right time for option m!\n\n";
print_usage();
exit $ERRORS{'UNKNOWN'};
}
 
if (!$opt_o) {
print "No Object specified\n\n";
print_usage();
exit $ERRORS{'UNKNOWN'};
}
#
# =====check if set all options for check external command =====
#
if (defined $opt_e && !$opt_x && !$opt_y) {
print "ERROR - using external command mode without a hostname/servicedesc won't work\n";
exit $ERRORS{'CRITICAL'};
}
 
#if ($opt_o == "cobu" && !$opt_w && !$opt_c) {
#   print "ERROR - You must set warning and critical treshold\n";
#   exit $ERRORS{'CRITICAL'};
#}
 
 
if ($opt_o eq "ahcl") {
#
# ===== Last active host checks =====
#
$line=`$bin -m -D ";" -d NUMCACHEDHSTCHECKS"$opt_m"M,NUMOACTHSTCHECKS"$opt_m"M,NUMPARHSTCHECKS"$opt_m"M,NUMSACTHSTCHECKS"$opt_m"M,NUMSERHSTCHECKS"$opt_m"M,NUMACTHSTCHECKS"$opt_m"M`;
$output .= "Active host checks last $opt_m $metricMin\n";
 
%data = (
NUM_TOTAL_ACT_HOSTCHECKS => 0,
NUM_SCHED_ACT_HOSTCHECKS => 0,
NUM_ONDEMAND_ACT_HOSTCHECKS => 0,
NUM_PARALLEL_ACT_HOSTCHECKS => 0,
NUM_SERIAL_ACT_HOSTCHECKS => 0,
NUM_CACHED_ACT_HOSTCHECKS => 0
);
%outputText = (
NUM_TOTAL_ACT_HOSTCHECKS => "Number of total active host checks occuring in last $opt_m $metricMin: ",
NUM_SCHED_ACT_HOSTCHECKS => "Number of scheduled active host checks occuring in last $opt_m $metricMin: ",
NUM_ONDEMAND_ACT_HOSTCHECKS => "Number of on-demand active host checks occuring in last $opt_m $metricMin: ",
NUM_PARALLEL_ACT_HOSTCHECKS => "Number of parallel host checks occuring in last $opt_m $metricMin: ",
NUM_SERIAL_ACT_HOSTCHECKS => "Number of serial host checks occuring in last $opt_m $metricMin: ",
NUM_CACHED_ACT_HOSTCHECKS => "Number of cached host checks occuring in last $opt_m $metricMin: "
);
%outputMetric = (
NUM_TOTAL_ACT_HOSTCHECKS => "",
NUM_SCHED_ACT_HOSTCHECKS => "",
NUM_ONDEMAND_ACT_HOSTCHECKS => "",
NUM_PARALLEL_ACT_HOSTCHECKS => "",
NUM_SERIAL_ACT_HOSTCHECKS => "",
NUM_CACHED_ACT_HOSTCHECKS => ""
);
 
while ($line) {
foreach my $key (sort(keys %data)) {
($line,$value)=_parse($line);
$data{$key}=$value;
}
}
} elsif ($opt_o eq "ascl") {
#
# ===== Last active service checks =====
#
$line=`$bin -m -D ";" -d NUMCACHEDSVCCHECKS"$opt_m"M,NUMOACTSVCCHECKS"$opt_m"M,NUMPSVSVCCHECKS"$opt_m"M,NUMSACTSVCCHECKS"$opt_m"M,NUMACTSVCCHECKS"$opt_m"M`;
$output .= "Active service checks last $opt_m $metricMin\n";
 
%data = (
NUM_TOTAL_ACT_SERVICECHECKS => 0,
NUM_SCHED_ACT_SERVICECHECKS => 0,
NUM_ONDEMAND_ACT_SERVICECHECKS => 0,
NUM_PASSIVE_ACT_SERVICECHECKS => 0,
NUM_CACHED_ACT_SERVICECHECKS => 0
);
%outputText = (
NUM_TOTAL_ACT_SERVICECHECKS => "Number of total active service checks occuring in last $opt_m $metricMin: ",
NUM_SCHED_ACT_SERVICECHECKS => "Number of scheduled active service checks occuring in last $opt_m $metricMin: ",
NUM_ONDEMAND_ACT_SERVICECHECKS => "Number of on-demand active service checks occuring in last $opt_m $metricMin: ",
NUM_PASSIVE_ACT_SERVICECHECKS => "Number of passive service checks occuring in last $opt_m $metricMin: ",
NUM_CACHED_ACT_SERVICECHECKS => "Number of cached service checks occuring in last $opt_m $metricMin: "
);
%outputMetric = (
NUM_TOTAL_ACT_SERVICECHECKS => "",
NUM_SCHED_ACT_SERVICECHECKS => "",
NUM_ONDEMAND_ACT_SERVICECHECKS => "",
NUM_PASSIVE_ACT_SERVICECHECKS => "",
NUM_CACHED_ACT_SERVICECHECKS => ""
);
 
while ($line) {
foreach my $key (sort(keys %data)) {
($line,$value)=_parse($line);
$data{$key}=$value;
}
}
} elsif ($opt_o eq "exec") {
#
# ===== Average active host/service executen time =====
#
$line=`$bin -m -D ";" -d AVGACTHSTEXT,AVGACTSVCEXT`;
$output .= "Active host/service checks average executen time\n";
 
%data = (
AVG_ACTHST_CHECK_EXECTIME => 0,
AVG_ACTSVC_CHECK_EXECTIME => 0
);
%outputText = (
AVG_ACTHST_CHECK_EXECTIME => "Active host check execution time: ",
AVG_ACTSVC_CHECK_EXECTIME => "Active service check execution time: "
);
%outputMetric = (
AVG_ACTHST_CHECK_EXECTIME => " sec (Average)",
AVG_ACTSVC_CHECK_EXECTIME => " sec (Average)"
);
 
while ($line) {
foreach my $key (sort(keys %data)) {
($line,$value)=_parse($line);
$data{$key}=$value/1000;
}
}
} elsif ($opt_o eq "alat") {
#
# ===== Average active host/service check latency =====
#
$line=`$bin -m -D ";" -d AVGACTHSTLAT,AVGACTSVCLAT`;
$output = "OK - Active host/service checks average latency\n";
 
%data = (
AVG_ACTHST_CHECK_LATENCY => 0,
AVG_ACTSVC_CHECK_LATENCY => 0
);
%outputText = (
AVG_ACTHST_CHECK_LATENCY => "Active host check latency: ",
AVG_ACTSVC_CHECK_LATENCY => "Active service check latency: "
);
%outputMetric = (
AVG_ACTHST_CHECK_LATENCY => " sec (Average)",
AVG_ACTSVC_CHECK_LATENCY => " sec (Average)"
);
 
while ($line) {
foreach my $key (sort(keys %data)) {
($line,$value)=_parse($line);
$data{$key}=$value/1000;
}
}
} elsif ($opt_o eq "plat") {
#
# ===== Average passive host/service check latency =====
#
$line=`$bin -m -D ";" -d AVGPSVSVCLAT,AVGPSVHSTLAT`;
$output .= "Passive host/service checks average latency\n";
 
%data = (
AVG_PSVHST_CHECK_LATENCY => 0,
AVG_PSVSVC_CHECK_LATENCY => 0
);
%outputText = (
AVG_PSVHST_CHECK_LATENCY => "Passive host check latency: ",
AVG_PSVSVC_CHECK_LATENCY => "Passive service check latency: "
);
%outputMetric = (
AVG_PSVHST_CHECK_LATENCY => " sec (Average)",
AVG_PSVSVC_CHECK_LATENCY => " sec (Average)"
);
 
while ($line) {
foreach my $key (sort(keys %data)) {
($line,$value)=_parse($line);
$data{$key}=$value/1000;
}
}
} elsif ($opt_o eq "cobu") {
#
# ===== External command buffer usage =====
#
$TotCmdBuf=`$bin -m -d TOTCMDBUF`;
chomp $TotCmdBuf;
$UsedCmdBuf=`$bin -m -d USEDCMDBUF`;
chomp $UsedCmdBuf;
$HighCmdBuf=`$bin -m -d HIGHCMDBUF`;
chomp $HighCmdBuf;
$output .= "Used command buffer slots $UsedCmdBuf\n";
if($UsedCmdBuf >= $opt_c) {
$output = "CRITICAL - Used external command buffer slots $UsedCmdBuf\n";
$state = "CRITICAL"
} elsif($UsedCmdBuf >= $opt_w) {
$output = "WARNING - Used external command buffer slots $UsedCmdBuf\n";
$state = "WARNING";
}
 
%data = (
USED_COMMAND_BUFFER_SLOTS => $UsedCmdBuf,
TOTAL_COMMAND_BUFFER_SLOTS => $TotCmdBuf,
HIGH_COMMAND_BUFFER_SLOTS => $HighCmdBuf
);
%outputText = (
USED_COMMAND_BUFFER_SLOTS => "Number of external command buffer slots currently in use: ",
TOTAL_COMMAND_BUFFER_SLOTS => "Total number of external command buffer slots available: ",
HIGH_COMMAND_BUFFER_SLOTS => "Highest number of external command buffer slots ever in use: "
);
%outputMetric = (
USED_COMMAND_BUFFER_SLOTS => " slots",
TOTAL_COMMAND_BUFFER_SLOTS => " slots",
HIGH_COMMAND_BUFFER_SLOTS => " slots"
);
%classService = (
USED_COMMAND_BUFFER_SLOTS => $state,
);
 
# --- Create perfdata ---
$perfdataFlag = "1";
$perfdata = "'currently used external command buffer'=".$UsedCmdBuf."slots;".$opt_w.";".$opt_c.";0;".$TotCmdBuf;
$perfdata .= " 'max used external command buffer'=".$HighCmdBuf."slots;".$opt_w.";".$opt_c.";0;".$TotCmdBuf;
} elsif ($opt_o eq "ecol") {
#
# ===== External commands last=====
#
$line=`$bin -m -D ";" -d NUMEXTCMDS"$opt_m"M`;
$output .= "External commands processed in last $opt_m $metricMin minutes\n";
 
%data = (
NUM_EXTERNAL_COMMANDS => 0,
);
%outputText = (
NUM_EXTERNAL_COMMANDS => "Number of external commands processed in $opt_m $metricMin: ",
);
%outputMetric = (
NUM_EXTERNAL_COMMANDS => "",
);
 
while ($line) {
foreach my $key (sort(keys %data)) {
($line,$value)=_parse($line);
$data{$key}=$value;
}
}
} else {
printf("UNKNOWN - Don't know that option!\n");
exit $ERRORS{'UNKNOWN'};
}
 
#
# ===== Create HTML output =====
#
$output .= "<TABLE style='border-left-width:1px; border-left-style:dotted; border-right-width:0px;' >";
foreach my $key (sort(keys %data)) {
if(!$classService{$key}) {
$classService{$key} = "OK";
}
 
$output .= "<TR>";
$output .= "<TD nowrap><DIV CLASS='service".$classService{$key}."' style='font-size:7pt'>&nbsp;-&nbsp;</DIV></TD>";
$output .= "<TD>$outputText{$key}"."$data{$key}"."$outputMetric{$key}</TD>";
$output .= "</TR>";
 
# --- Check if perfdata already set ---
if(!$perfdataFlag) {
$perfdata .= "$key=$data{$key} ";
}
}
$output .= "</TABLE>";
 
#
# ===== Print output =====
#
if (!$opt_e) {
print $output."|".$perfdata."\n";
}
 
if ($opt_e) {
my $date=`date +%s`;
chomp ($date);
open(NEF, '+<', $nagios_extcmd_file)          # open for update
or die "Can't open '$nagios_extcmd_file' for update: $!";
print NEF "[$date] PROCESS_SERVICE_CHECK_RESULT;$opt_x;$opt_y;0;$output."|".$perfdata";
close(NEF);
}
 
exit $ERRORS{$state};

The PNP Template

<?php
 
#
# Default Template used if no other template is found.
# Don`t delete this file !
# $Id: default.php 252 2007-05-15 09:50:14Z hendrikb $
#
#
# Define some colors ..
#
define("_WARNRULE", '#FFFF00');
define("_CRITRULE", '#FF0000');
define("_AREA", '#00FF80');
define("_LINE", '#000000');
 
$color=array("","FF0000","00FF00","0000FF","8000FF","000000","555555","00FF80","FF00FF");
#
# Inital Logic ...
#
$opt[1] = '--vertical-label "" -l0  --title " ' . $hostname . '/' .  $servicedesc .'" ' ;
$count_elements=count($DS);
$def[1]="";
for ($mur=1; $mur <= $count_elements; $mur++) {
        $def[1] .= "DEF:var$mur=$rrdfile:$DS[$mur]:AVERAGE ";
        $def[1] .= "LINE1:var$mur#$color[$mur]:\"$NAME[$mur]\" ";
        $def[1] .= "GPRINT:var$mur:LAST:\"\t%6.2lf last\" " ;
        $def[1] .= "GPRINT:var$mur:MAX:\"\t%6.2lf max \" " ;
        $def[1] .= "GPRINT:var$mur:AVERAGE:\"\t%6.2lf avg \\n\" " ;
 
}
 
?>

Screenshots

Nagios Service output

PNP-Graph

projects/check_nagios_performance/start.txt · Last modified: 2008/02/24 15:39 by luebbenm
chimeric.de = chi`s home Creative Commons License Valid CSS Driven by DokuWiki do yourself a favour and use a real browser - get firefox!! Recent changes RSS feed Valid XHTML 1.0