9. pnp4nagios

最後更新: 2018-02-01

介紹

 

利用 check_nrpe 資料去 plotting graph

HomePage: http://docs.pnp4nagios.org/

modes to process performance data

 - Synchronous
 - Bulk
 - Bulk Mode with NPCD
 - Gearman

Synchronous:

Nagios will call the perl script process_perfdata.pl for every service and host, respectively, to process the data.

Bulk:

Nagios writes the necessary data to a temporary file. After expiration of a defined time the file will be processed in one piece and deleted afterwards.

 


Install

 

yum install rrdtool perl-Time-HiRes rrdtool-perl php-gd

yum install nagios-plugins-all nagios-plugins-nrpe

yum install pnp4nagios

service httpd restart


pnp4nagios apache Config

 

/etc/httpd/conf.d/pnp4nagios.conf

Alias /pnp4nagios "/usr/share/nagios/html/pnp4nagios"
AuthUserFile /etc/nagios/passwd
Require valid-user

/etc/logrotate.d/pnp4nagios

/var/log/pnp4nagios/*.log {
    compress
    missingok
    notifempty
    rotate 5
    size 100k
}

/etc/pnp4nagios/nagios.cfg

/etc/pnp4nagios/config.php

/etc/pnp4nagios/check_commands/check_nrpe.cfg

 



修改設定 - Synchronous Mode

 

# The synchronous mode is the simplest way to integrate the data collector process_perfdata.pl into nagios

 * 在 nagios 3 才用到, nagios 4 唔用得此 mode

nagios.cfg

process_performance_data=1
enable_environment_macros=1
host_perfdata_command=process-host-perfdata
service_perfdata_command=process-service-perfdata

# process_performance_data=1
# Default is “0”
# host performance data will be processed using the host_perfdata_command and
# service performance data will be processed using the service_perfdata_command

# enable_environment_macros=1
# Nagios will make all standard macros available as environment variables

commands.cfg

#### pnp4nagios Synchronous mode
define command {
       command_name    process-service-perfdata
       command_line    /usr/bin/perl /usr/libexec/pnp4nagios/process_perfdata.pl
}

define command {
       command_name    process-host-perfdata
       command_line    /usr/bin/perl /usr/libexec/pnp4nagios/process_perfdata.pl -d HOSTPERFDATA
}

service httpd restart

service nagios restart

 


Verify pnp4nagios configuration

 

wget http://verify.pnp4nagios.org/verify_pnp_config

perl verify_pnp_config --mode sync \
 --config=/etc/nagios/nagios.cfg --pnpcfg=/etc/pnp4nagios

LOG

[1516779800] wproc: HOST PERFDATA job 0 from worker Core Worker 2203 is a non-check helper but exited with return code 2
[1516779800] wproc:   early_timeout=0; exited_ok=1; wait_status=512; error_code=0;
[1516779806] wproc: SERVICE PERFDATA job 0 from worker Core Worker 2201 is a non-check helper but exited with return code 2
[1516779806] wproc:   early_timeout=0; exited_ok=1; wait_status=512; error_code=0;

 * Turns out that Nagios 4x doesn't play nice with pnp4nagios in Synchronous Mode. You should use Bulk Mode with NPCD at this time.

 


修改設定 - Bulk+npcd

 

好處

data collector process_perfdata.pl is not invoked for every service/host check.

nagios.cfg

## Enable processing of performance data
process_performance_data=1

## host_perfdata_command & service_perfdata_command is not allowed in mode 'bulk+npcd'
#host_perfdata_command=process-host-perfdata
#service_perfdata_command=process-service-perfdata

## Checking
# grep -e process_performance_data -e perfdata_command /etc/nagios/nagios.cfg

###################### bulk mode
host_perfdata_file=/var/log/pnp4nagios/host-perfdata
# format of the temporary file. Data will be defined using Nagios macros.
host_perfdata_file_template=DATATYPE::HOSTPERFDATA\tTIMET::$TIMET$\tHOSTNAME::$HOSTNAME$\tHOSTPERFDATA::$HOSTPERFDATA$\tHOSTCHECKCOMMAND::$HOSTCHECKCOMMAND$\tHOSTSTATE::$HOSTSTATE$\tHOSTSTATETYPE::$HOSTSTATETYPE$
host_perfdata_file_mode=a
host_perfdata_file_processing_interval=15
host_perfdata_file_processing_command=process-host-perfdata-file

service_perfdata_file=/var/log/pnp4nagios/service-perfdata
service_perfdata_file_template=DATATYPE::SERVICEPERFDATA\tTIMET::$TIMET$\tHOSTNAME::$HOSTNAME$\tSERVICEDESC::$SERVICEDESC$\tSERVICEPERFDATA::$SERVICEPERFDATA$\tSERVICECHECKCOMMAND::$SERVICECHECKCOMMAND$\tHOSTSTATE::$HOSTSTATE$\tHOSTSTATETYPE::$HOSTSTATETYPE$\tSERVICESTATE::$SERVICESTATE$\tSERVICESTATETYPE::$SERVICESTATETYPE$
service_perfdata_file_mode=a
service_perfdata_file_processing_interval=15
service_perfdata_file_processing_command=process-service-perfdata-file

## Checking
# grep "_perfdata_file_processing_command" /etc/nagios/nagios.cfg

commands.cfg

###################### check_nrpe
define command{
        command_name    nrpe_cpu
        command_line    $USER1$/check_nrpe -n -H $HOSTADDRESS$ -c alias_cpu
}
define command{
        command_name    nrpe_mem
        command_line    $USER1$/check_nrpe -n -H $HOSTADDRESS$ -c check_memory
}
define command{
        command_name    nrpe_disk
        command_line    $USER1$/check_nrpe -n -H $HOSTADDRESS$ -c alias_disk
}
define command{
        command_name    nrpe_uptime
        command_line    $USER1$/check_nrpe -n -H $HOSTADDRESS$ -c alias_uptime
}

###################### pnp4nagios
# 與 Default 的不同
define command{
        command_name    process-host-perfdata
        command_line    /usr/bin/printf "%b" "$LASTHOSTCHECK$\t$HOSTNAME$\t$HOSTSTATE$\t$HOSTATTEMPT$\t$HOSTSTATETYPE$\t$HOSTEXECUTIONTIME$\t$HOSTOUTPUT$\t$HOSTPERFDATA$\n" >> /var/log/nagios/host-perfdata.out
}
define command{
        command_name    process-service-perfdata
        command_line    /usr/bin/printf "%b" "$LASTSERVICECHECK$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICESTATE$\t$SERVICEATTEMPT$\t$SERVICESTATETYPE$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$\n" >> /var/log/nagios/service-perfdata.out
}

# mv file 去比 npcd 食
define command{
       command_name    process-host-perfdata-file
       command_line    /bin/mv /var/log/pnp4nagios/host-perfdata /var/spool/pnp4nagios/host-perfdata.$TIMET$
}
define command{
       command_name    process-service-perfdata-file
       command_line    /bin/mv /var/log/pnp4nagios/service-perfdata /var/spool/pnp4nagios/service-perfdata.$TIMET$
}

## Checking
# 在 npcd 未執行前, 那裡會愈來愈多 file
# watch -n 1 'ls /var/spool/pnp4nagios/'

npcd.cfg

max_logfile_size = 10485760

perfdata_spool_dir = /var/spool/pnp4nagios
perfdata_file_run_cmd = /usr/libexec/pnp4nagios/process_perfdata.pl
perfdata_file_run_cmd_args = --bulk

# how many seconds npcd should wait between dirscans
sleep_time = 15

perfdata_spool_filename = perfdata

perfdata_file_processing_interval = 15

Test

perl verify_pnp_config --mode "bulk+npcd" \
 --config=/etc/nagios/nagios.cfg --pnpcfg=/etc/pnp4nagios

Start npcd

# -f | --config
# -d | --daemon

/usr/sbin/npcd -d -f /etc/pnp4nagios/npcd.cfg
 


Integrated with Nagios

 

templates.cfg

# pnp4nagios setting
define host {
   name       host-pnp
   action_url /pnp4nagios/index.php/graph?host=$HOSTNAME$&srv=_HOST_
   register   0
}
 
define service {
   name       service-pnp
   action_url /pnp4nagios/index.php/graph?host=$HOSTNAME$&srv=$SERVICEDESC$
   register   0
}

windows.cfg

define host{
        use             windows-server,host-pnp
        host_name       DB01
        alias           3.71
        address         192.168.2.80
        hostgroups      windows-servers
        }
    
define service{
        use                 generic-service,service-pnp
        hostgroup_name      windows-servers
        service_description CPU Usage
        check_command       nrpe_cpu
}

 


perfdata_format

 

format

'label'=value[UOM];[warn];[crit];[min];[max]

1. space separated list of label/value pairs

2. the single quotes for the label are optional. Required if spaces, = or ' are in the label

3. value, min and max in class [0-9] Must all be the same UOM

4. UOM (unit of measurement) is one of:
    no unit specified - assume a number (int or float) of things (eg, users, processes, load averages)
    s - seconds (also us, ms)
    % - percentage
    B - bytes (also KB, MB, TB, GB?)
    c - a continous counter (such as bytes transmitted on an interface)

ie.

WARNING: physical = 3.434GB|'physical'=3.43408GB;3.19965;3.5996;0;3.99956 'physical %'=86%;80;90;0;100

https://docs.pnp4nagios.org/pnp-0.6/perfdata_format