5 check_bacula.pl -- check for bacula status
10 -C|--Client=ss List of clients
11 -g|--group=ss List of groups
13 -l|--level=F/I/D Specify job level
15 -w|--warning=i warning threshold (jobs)
16 -c|--critical=i critical threshold (jobs)
18 -S|--Storage=ss List of SDs to test
20 -s|--scratch=i threshold scratch number
21 -m|--mediatype=ss Media type to check for scratch
23 -R|--Running=i Test for maximum running jobs for a period (in hours)
24 -F|--Failed=i Test for failed jobs after a period (in mins)
29 - if more than 10 jobs are running for client c1 and c2 for 1 hour
30 check_bacula.pl -R 1 -C c1 -C c2 -w 10 -c 15
32 - if more than 10 jobs are running for group g1 for 2 hours
33 check_bacula.pl -R 2 -g g1 -w 10 -c 15
35 - if more than 10 jobs are failed of canceled for 2 hours for group g1
36 check_bacula.pl -F 120 -g g1 -w 10 -c 15
38 - if S1_LTO1 and S1_LTO2 storage deamon are responding to status cmd
39 check_bacula.pl -S S1_LTO1 -S S1_LTO2
41 - if the scratch pool contains 5 volumes with mediatype Tape% at minimum
42 check_bacula.pl -s 2 -m Tape%
44 You can mix all options
46 check_bacula.pl -g g1 -w 10 -c 15 -S S1_LTO1 -s 2 -m Tape%
48 - if we have more than 10 jobs in error or already running for 2 hours
49 check_bacula.pl -R 2 -F 20 -w 10 -c 15
53 Bweb - A Bacula web interface
54 Bacula® - The Network Backup Solution
56 Copyright (C) 2000-2010 Free Software Foundation Europe e.V.
58 The main author of Bweb is Eric Bollengier.
59 The main author of Bacula is Kern Sibbald, with contributions from
60 many others, a complete list can be found in the file AUTHORS.
61 This program is Free Software; you can redistribute it and/or
62 modify it under the terms of version three of the GNU Affero General Public
63 License as published by the Free Software Foundation and included
66 This program is distributed in the hope that it will be useful, but
67 WITHOUT ANY WARRANTY; without even the implied warranty of
68 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
69 General Public License for more details.
71 You should have received a copy of the GNU Affero General Public License
72 along with this program; if not, write to the Free Software
73 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
76 Bacula® is a registered trademark of Kern Sibbald.
77 The licensor of Bacula is the Free Software Foundation Europe
78 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
79 Switzerland, email:ftf@fsfeurope.org.
87 use POSIX qw/strftime/;
88 use Getopt::Long qw/:config no_ignore_case/;
91 my $config_file = $Bweb::config_file;
92 my (@client, @group, $help, $query, $verbose, @msg, @storage);
96 my $mediatype='%'; # check for all mediatype
97 my $nb_scratch; # check for scratch media
103 my $timeout=50; # timeout for storage status command
105 GetOptions("Client=s@" => \@client,
106 "group=s@" => \@group,
107 "scratch=i" => \$nb_scratch,
108 "warning=i" => \$warn,
109 "critical=i"=> \$crit,
110 "verbose" => \$verbose,
111 "timeout=i" => \$timeout,
112 "Storage=s@"=> \@storage,
113 "mediatype=s"=> \$mediatype,
114 "Runing:45" => \$max_run,
115 "Failed:12" => \$test_failed,
116 "level=s" => \$level,
118 || Pod::Usage::pod2usage(-exitval => 2, -verbose => 1) ;
120 Pod::Usage::pod2usage(-verbose => 1) if ( $help ) ;
122 my $conf = new Bweb::Config(config_file => $config_file);
124 my $bweb = new Bweb(info => $conf);
126 my $b = $bweb->get_bconsole();
127 $b->{timeout} = $timeout;
129 CGI::param(-name=> 'client',-value => \@client);
130 CGI::param(-name=> 'client_group', -value => \@group);
131 CGI::param(-name=> 'level', -value => $level);
133 my ($where, undef) = $bweb->get_param(qw/clients client_groups level/);
139 $c_filter = " JOIN Client USING (ClientId) ";
143 $g_filter = " JOIN client_group_member USING (ClientId) " .
144 " JOIN client_group USING (client_group_id) ";
147 ################################################################
148 # check if more than X jobs are running or just created
149 # for too long (more than 2 hours) since Y ago
152 my $trig = time - $max_run*60;
155 SELECT count(1) AS nb
156 FROM Job $c_filter $g_filter
158 WHERE JobStatus IN ('R', 'C')
163 $res = $bweb->dbh_selectrow_hashref($query);
167 push @msg, "$nb jobs are running (${max_run}m)";
169 } elsif ($nb >= $warn) {
170 push @msg, "$nb jobs are running (${max_run}m)";
171 $ret = ($ret>1)?$ret:1;
176 ################################################################
177 # check failed jobs (more than X) since x time ago
180 my $since = time - $test_failed*60*60;
183 SELECT count(1) AS nb
184 FROM Job $c_filter $g_filter
186 WHERE JobStatus IN ('E','e','f','A')
188 AND JobTDate > $since
191 $res = $bweb->dbh_selectrow_hashref($query);
195 push @msg, "$nb jobs are in error (${test_failed}h)";
197 } elsif ($nb >= $warn) {
198 push @msg, "$nb jobs are in error (${test_failed}h)";
199 $ret = ($ret>1)?$ret:1;
204 ################################################################
205 # check storage status command
207 foreach my $st (@storage) {
209 my $out = $b->send_cmd("status storage=\"$st\"");
210 if (!$out || $out !~ /Attr spooling|JobId/) {
211 push @msg, "timeout ($timeout s) or bad response on status storage $st";
216 ################################################################
217 # check for Scratch volume
221 SELECT MediaType AS mediatype, count(MediaId) AS nb
222 FROM Media JOIN Pool USING (PoolId)
223 WHERE Pool.Name = 'Scratch'
224 AND Media.MediaType LIKE '$mediatype'
229 $res = $bweb->dbh_selectall_hashref($query, 'mediatype');
230 if ($res && keys %$res) {
231 foreach my $k (keys %$res) {
232 if ($res->{$k}->{nb} < $nb_scratch) {
233 push @msg, "no more scratch for $k ($res->{$k}->{nb})";
237 } else { # query doesn't report anything...
238 push @msg, "no more scratch for $mediatype";
243 ################################################################
247 print "OK - All checks ok\n";
248 } elsif ($ret == 1) {
249 print "WARNING - ", join(", ", @msg), "\n";
251 print "CRITICAL - ", join(", ", @msg), "\n";