2 Bacula(R) - The Network Backup Solution
4 Copyright (C) 2000-2017 Kern Sibbald
6 The original author of Bacula is Kern Sibbald, with contributions
7 from many others, a complete list can be found in the file AUTHORS.
9 You may use this file and others of this release according to the
10 license defined in the LICENSE file, which includes the Affero General
11 Public License, v3.0 ("AGPLv3") and some additional permissions and
12 terms pursuant to its AGPLv3 Section 7.
14 This notice must be preserved when any source code is
15 conveyed and/or propagated.
17 Bacula(R) is a registered trademark of Kern Sibbald.
20 * Subroutines to handle waiting for operator intervention
21 * or waiting for a Device to be released
23 * Code for wait_for_sysop() pulled from askdir.c
25 * Kern Sibbald, March 2005
29 #include "bacula.h" /* pull in global headers */
30 #include "stored.h" /* pull in Storage Deamon headers */
32 const int dbglvl = 400;
35 * Wait for SysOp to mount a tape on a specific device
37 * Returns: W_ERROR, W_TIMEOUT, W_POLL, W_MOUNT, or W_WAKE
39 int wait_for_sysop(DCR *dcr)
43 struct timespec timeout;
44 time_t last_heartbeat = 0;
45 time_t first_start = time(NULL);
49 DEVICE *dev = dcr->dev;
53 Dmsg1(dbglvl, "Enter blocked=%s\n", dev->print_blocked());
56 * Since we want to mount a tape, make sure current one is
57 * not marked as using this drive.
61 unmounted = dev->is_device_unmounted();
64 * Wait requested time (dev->rem_wait_sec). However, we also wake up every
65 * HB_TIME seconds and send a heartbeat to the FD and the Director
66 * to keep stateful firewalls from closing them down while waiting
69 add_wait = dev->rem_wait_sec;
70 if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
71 add_wait = me->heartbeat_interval;
73 /* If the user did not unmount the tape and we are polling, ensure
74 * that we poll at the correct interval.
76 if (!unmounted && dev->vol_poll_interval && add_wait > dev->vol_poll_interval) {
77 add_wait = dev->vol_poll_interval;
81 Dmsg1(dbglvl, "blocked=%s\n", dev->print_blocked());
82 dev->dev_prev_blocked = dev->blocked();
83 dev->set_blocked(BST_WAITING_FOR_SYSOP); /* indicate waiting for mount */
86 for ( ; !job_canceled(jcr); ) {
87 time_t now, start, total_waited;
89 gettimeofday(&tv, &tz);
90 timeout.tv_nsec = tv.tv_usec * 1000;
91 timeout.tv_sec = tv.tv_sec + add_wait;
93 Dmsg4(dbglvl, "I'm going to sleep on device %s. HB=%d rem_wait=%d add_wait=%d\n",
94 dev->print_name(), (int)me->heartbeat_interval, dev->rem_wait_sec, add_wait);
97 /* Wait required time */
98 stat = dev->next_vol_timedwait(&timeout);
100 Dmsg2(dbglvl, "Wokeup from sleep on device stat=%d blocked=%s\n", stat,
101 dev->print_blocked());
103 total_waited = now - first_start;
104 dev->rem_wait_sec -= (now - start);
106 /* Note, this always triggers the first time. We want that. */
107 if (me->heartbeat_interval) {
108 if (now - last_heartbeat >= me->heartbeat_interval) {
109 /* send heartbeats */
110 if (jcr->file_bsock) {
111 jcr->file_bsock->signal(BNET_HEARTBEAT);
112 Dmsg0(dbglvl, "Send heartbeat to FD.\n");
114 if (jcr->dir_bsock) {
115 jcr->dir_bsock->signal(BNET_HEARTBEAT);
117 last_heartbeat = now;
121 if (stat == EINVAL) {
123 Jmsg1(jcr, M_FATAL, 0, _("pthread timedwait error. ERR=%s\n"), be.bstrerror(stat));
124 stat = W_ERROR; /* error */
129 * Continue waiting if operator is labeling volumes
131 if (dev->blocked() == BST_WRITING_LABEL) {
135 if (dev->rem_wait_sec <= 0) { /* on exceeding wait time return */
136 Dmsg0(dbglvl, "Exceed wait time.\n");
142 * Check if user unmounted the device while we were waiting
144 unmounted = dev->is_device_unmounted();
146 if (!unmounted && dev->vol_poll_interval &&
147 (total_waited >= dev->vol_poll_interval)) {
148 Dmsg1(dbglvl, "Set poll=true return in wait blocked=%s\n", dev->print_blocked());
149 dev->poll = true; /* returning a poll event */
154 * Check if user mounted the device while we were waiting
156 if (dev->blocked() == BST_MOUNT) { /* mount request ? */
157 Dmsg0(dbglvl, "Mounted return.\n");
163 * If we did not timeout, then some event happened, so
164 * return to check if state changed.
166 if (stat != ETIMEDOUT) {
168 Dmsg2(dbglvl, "Wake return. stat=%d. ERR=%s\n", stat, be.bstrerror(stat));
169 stat = W_WAKE; /* someone woke us */
174 * At this point, we know we woke up because of a timeout,
175 * that was due to a heartbeat, because any other reason would
176 * have caused us to return, so update the wait counters and continue.
178 add_wait = dev->rem_wait_sec;
179 if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
180 add_wait = me->heartbeat_interval;
182 /* If the user did not unmount the tape and we are polling, ensure
183 * that we poll at the correct interval.
185 if (!unmounted && dev->vol_poll_interval &&
186 add_wait > dev->vol_poll_interval - total_waited) {
187 add_wait = dev->vol_poll_interval - total_waited;
195 dev->set_blocked(dev->dev_prev_blocked); /* restore entry state */
196 Dmsg1(dbglvl, "set %s\n", dev->print_blocked());
198 Dmsg2(dbglvl, "Exit blocked=%s poll=%d\n", dev->print_blocked(), dev->poll);
205 * Wait for any device to be released, then we return, so
206 * higher level code can rescan possible devices. Since there
207 * could be a job waiting for a drive to free up, we wait a maximum
208 * of 1 minute then retry just in case a broadcast was lost, and
209 * we return to rescan the devices.
211 * Returns: true if a device has changed state
212 * false if the total wait time has expired.
214 bool wait_for_any_device(JCR *jcr, int &retries)
218 struct timespec timeout;
221 const int max_wait_time = 1 * 60; /* wait 1 minute */
224 Dmsg0(dbglvl, "Enter wait_for_any_device\n");
225 P(device_release_mutex);
227 if (++retries % 5 == 0) {
228 /* Print message every 5 minutes */
229 Jmsg(jcr, M_MOUNT, 0, _("JobId=%s, Job %s waiting to reserve a device.\n"),
230 edit_uint64(jcr->JobId, ed1), jcr->Job);
233 gettimeofday(&tv, &tz);
234 timeout.tv_nsec = tv.tv_usec * 1000;
235 timeout.tv_sec = tv.tv_sec + max_wait_time;
237 Dmsg0(dbglvl, "Going to wait for a device.\n");
239 /* Wait required time */
240 stat = pthread_cond_timedwait(&wait_device_release, &device_release_mutex, &timeout);
241 Dmsg1(dbglvl, "Wokeup from sleep on device stat=%d\n", stat);
243 V(device_release_mutex);
244 Dmsg1(dbglvl, "Return from wait_device ok=%d\n", ok);
249 * Wait for a specific device to be released
250 * We wait a maximum of 1 minute then
251 * retry just in case a broadcast was lost.
253 * Returns: true if the device has changed state
254 * false if the total wait time has expired.
256 bool wait_for_device(DCR *dcr, int &retries)
260 struct timespec timeout;
262 DEVICE *dev = dcr->dev;
265 const int max_wait_time = 1 * 60; /* wait 1 minute */
268 Dmsg3(40, "Enter wait_for_device. busy=%d dcrvol=%s devvol=%s\n",
269 dev->is_busy(), dcr->VolumeName, dev->getVolCatName());
271 P(device_release_mutex);
273 if (++retries % 5 == 0) {
274 /* Print message every 5 minutes */
275 Jmsg(jcr, M_MOUNT, 0, _("JobId=%s, Job %s waiting device %s.\n"),
276 edit_uint64(jcr->JobId, ed1), jcr->Job, dcr->dev->print_name());
279 gettimeofday(&tv, &tz);
280 timeout.tv_nsec = tv.tv_usec * 1000;
281 timeout.tv_sec = tv.tv_sec + max_wait_time;
283 Dmsg0(dbglvl, "Going to wait for a device.\n");
285 /* Wait required time */
286 stat = pthread_cond_timedwait(&wait_device_release, &device_release_mutex, &timeout);
287 Dmsg1(dbglvl, "Wokeup from sleep on device stat=%d\n", stat);
289 V(device_release_mutex);
290 Dmsg1(dbglvl, "Return from wait_device ok=%d\n", ok);
296 * This routine initializes the device wait timers
298 void init_device_wait_timers(DCR *dcr)
300 DEVICE *dev = dcr->dev;
303 /* ******FIXME******* put these on config variables */
304 dev->min_wait = 60 * 60;
305 dev->max_wait = 24 * 60 * 60;
306 dev->max_num_wait = 9; /* 5 waits =~ 1 day, then 1 day at a time */
307 dev->wait_sec = dev->min_wait;
308 dev->rem_wait_sec = dev->wait_sec;
312 jcr->min_wait = 60 * 60;
313 jcr->max_wait = 24 * 60 * 60;
314 jcr->max_num_wait = 9; /* 5 waits =~ 1 day, then 1 day at a time */
315 jcr->wait_sec = jcr->min_wait;
316 jcr->rem_wait_sec = jcr->wait_sec;
321 void init_jcr_device_wait_timers(JCR *jcr)
323 /* ******FIXME******* put these on config variables */
324 jcr->min_wait = 60 * 60;
325 jcr->max_wait = 24 * 60 * 60;
326 jcr->max_num_wait = 9; /* 5 waits =~ 1 day, then 1 day at a time */
327 jcr->wait_sec = jcr->min_wait;
328 jcr->rem_wait_sec = jcr->wait_sec;
334 * The dev timers are used for waiting on a particular device
336 * Returns: true if time doubled
337 * false if max time expired
339 bool double_dev_wait_time(DEVICE *dev)
341 dev->wait_sec *= 2; /* double wait time */
342 if (dev->wait_sec > dev->max_wait) { /* but not longer than maxtime */
343 dev->wait_sec = dev->max_wait;
346 dev->rem_wait_sec = dev->wait_sec;
347 if (dev->num_wait >= dev->max_num_wait) {