5 check_bacula.pl -- check for bacula status
10 -C|--Client=ss List of clients
11 -g|--group=ss List of groups
13 -l|--level=F/I/D Specify job level
15 -w|--warning=i warning threshold (jobs)
16 -c|--critical=i critical threshold (jobs)
18 -S|--Storage=ss List of SDs to test
20 -s|--scratch=i threshold scratch number
21 -m|--mediatype=ss Media type to check for scratch
23 -R|--Running=i Test for maximum running jobs for a period (in hours)
24 -F|--Failed=i Test for failed jobs after a period (in mins)
29 - if more than 10 jobs are running for client c1 and c2 for 1 hour
30 check_bacula.pl -R 1 -C c1 -C c2 -w 10 -c 15
32 - if more than 10 jobs are running for group g1 for 2 hours
33 check_bacula.pl -R 2 -g g1 -w 10 -c 15
35 - if more than 10 jobs are failed of canceled for 2 hours for group g1
36 check_bacula.pl -F 120 -g g1 -w 10 -c 15
38 - if S1_LTO1 and S1_LTO2 storage deamon are responding to status cmd
39 check_bacula.pl -S S1_LTO1 -S S1_LTO2
41 - if the scratch pool contains 5 volumes with mediatype Tape% at minimum
42 check_bacula.pl -s 2 -m Tape%
44 You can mix all options
46 check_bacula.pl -g g1 -w 10 -c 15 -S S1_LTO1 -s 2 -m Tape%
48 - if we have more than 10 jobs in error or already running for 2 hours
49 check_bacula.pl -R 2 -F 20 -w 10 -c 15
53 Bweb - A Bacula web interface
54 Bacula® - The Network Backup Solution
56 Copyright (C) 2000-2009 Free Software Foundation Europe e.V.
58 The main author of Bweb is Eric Bollengier.
59 The main author of Bacula is Kern Sibbald, with contributions from
60 many others, a complete list can be found in the file AUTHORS.
62 This program is Free Software; you can redistribute it and/or
63 modify it under the terms of version two of the GNU General Public
64 License as published by the Free Software Foundation plus additions
65 that are listed in the file LICENSE.
67 This program is distributed in the hope that it will be useful, but
68 WITHOUT ANY WARRANTY; without even the implied warranty of
69 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
70 General Public License for more details.
72 You should have received a copy of the GNU General Public License
73 along with this program; if not, write to the Free Software
74 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
77 Bacula® is a registered trademark of John Walker.
78 The licensor of Bacula is the Free Software Foundation Europe
79 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zurich,
80 Switzerland, email:ftf@fsfeurope.org.
88 use POSIX qw/strftime/;
89 use Getopt::Long qw/:config no_ignore_case/;
92 my $config_file = $Bweb::config_file;
93 my (@client, @group, $help, $query, $verbose, @msg, @storage);
97 my $mediatype='%'; # check for all mediatype
98 my $nb_scratch; # check for scratch media
104 my $timeout=50; # timeout for storage status command
106 GetOptions("Client=s@" => \@client,
107 "group=s@" => \@group,
108 "scratch=i" => \$nb_scratch,
109 "warning=i" => \$warn,
110 "critical=i"=> \$crit,
111 "verbose" => \$verbose,
112 "timeout=i" => \$timeout,
113 "Storage=s@"=> \@storage,
114 "mediatype=s"=> \$mediatype,
115 "Runing:45" => \$max_run,
116 "Failed:12" => \$test_failed,
117 "level=s" => \$level,
119 || Pod::Usage::pod2usage(-exitval => 2, -verbose => 1) ;
121 Pod::Usage::pod2usage(-verbose => 1) if ( $help ) ;
123 my $conf = new Bweb::Config(config_file => $config_file);
125 my $bweb = new Bweb(info => $conf);
127 my $b = $bweb->get_bconsole();
128 $b->{timeout} = $timeout;
130 CGI::param(-name=> 'client',-value => \@client);
131 CGI::param(-name=> 'client_group', -value => \@group);
132 CGI::param(-name=> 'level', -value => $level);
134 my ($where, undef) = $bweb->get_param(qw/clients client_groups level/);
140 $c_filter = " JOIN Client USING (ClientId) ";
144 $g_filter = " JOIN client_group_member USING (ClientId) " .
145 " JOIN client_group USING (client_group_id) ";
148 ################################################################
149 # check if more than X jobs are running or just created
150 # for too long (more than 2 hours) since Y ago
153 my $trig = time - $max_run*60;
156 SELECT count(1) AS nb
157 FROM Job $c_filter $g_filter
159 WHERE JobStatus IN ('R', 'C')
164 $res = $bweb->dbh_selectrow_hashref($query);
168 push @msg, "$nb jobs are running (${max_run}m)";
170 } elsif ($nb >= $warn) {
171 push @msg, "$nb jobs are running (${max_run}m)";
172 $ret = ($ret>1)?$ret:1;
177 ################################################################
178 # check failed jobs (more than X) since x time ago
181 my $since = time - $test_failed*60*60;
184 SELECT count(1) AS nb
185 FROM Job $c_filter $g_filter
187 WHERE JobStatus IN ('E','e','f','A')
189 AND JobTDate > $since
192 $res = $bweb->dbh_selectrow_hashref($query);
196 push @msg, "$nb jobs are in error (${test_failed}h)";
198 } elsif ($nb >= $warn) {
199 push @msg, "$nb jobs are in error (${test_failed}h)";
200 $ret = ($ret>1)?$ret:1;
205 ################################################################
206 # check storage status command
208 foreach my $st (@storage) {
210 my $out = $b->send_cmd("status storage=\"$st\"");
211 if (!$out || $out !~ /Attr spooling|JobId/) {
212 push @msg, "timeout ($timeout s) or bad response on status storage $st";
217 ################################################################
218 # check for Scratch volume
222 SELECT MediaType AS mediatype, count(MediaId) AS nb
223 FROM Media JOIN Pool USING (PoolId)
224 WHERE Pool.Name = 'Scratch'
225 AND Media.MediaType LIKE '$mediatype'
230 $res = $bweb->dbh_selectall_hashref($query, 'mediatype');
231 if ($res && keys %$res) {
232 foreach my $k (keys %$res) {
233 if ($res->{$k}->{nb} < $nb_scratch) {
234 push @msg, "no more scratch for $k ($res->{$k}->{nb})";
238 } else { # query doesn't report anything...
239 push @msg, "no more scratch for $mediatype";
244 ################################################################
248 print "OK - All checks ok\n";
249 } elsif ($ret == 1) {
250 print "WARNING - ", join(", ", @msg), "\n";
252 print "CRITICAL - ", join(", ", @msg), "\n";