2 Bacula(R) - The Network Backup Solution
4 Copyright (C) 2000-2015 Kern Sibbald
5 Copyright (C) 2000-2014 Free Software Foundation Europe e.V.
7 The original author of Bacula is Kern Sibbald, with contributions
8 from many others, a complete list can be found in the file AUTHORS.
10 You may use this file and others of this release according to the
11 license defined in the LICENSE file, which includes the Affero General
12 Public License, v3.0 ("AGPLv3") and some additional permissions and
13 terms pursuant to its AGPLv3 Section 7.
15 This notice must be preserved when any source code is
16 conveyed and/or propagated.
18 Bacula(R) is a registered trademark of Kern Sibbald.
21 * Subroutines to handle waiting for operator intervention
22 * or waiting for a Device to be released
24 * Code for wait_for_sysop() pulled from askdir.c
26 * Kern Sibbald, March 2005
31 #include "bacula.h" /* pull in global headers */
32 #include "stored.h" /* pull in Storage Deamon headers */
34 const int dbglvl = 400;
37 * Wait for SysOp to mount a tape on a specific device
39 * Returns: W_ERROR, W_TIMEOUT, W_POLL, W_MOUNT, or W_WAKE
41 int wait_for_sysop(DCR *dcr)
45 struct timespec timeout;
46 time_t last_heartbeat = 0;
47 time_t first_start = time(NULL);
51 DEVICE *dev = dcr->dev;
55 Dmsg1(dbglvl, "Enter blocked=%s\n", dev->print_blocked());
58 * Since we want to mount a tape, make sure current one is
59 * not marked as using this drive.
63 unmounted = dev->is_device_unmounted();
66 * Wait requested time (dev->rem_wait_sec). However, we also wake up every
67 * HB_TIME seconds and send a heartbeat to the FD and the Director
68 * to keep stateful firewalls from closing them down while waiting
71 add_wait = dev->rem_wait_sec;
72 if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
73 add_wait = me->heartbeat_interval;
75 /* If the user did not unmount the tape and we are polling, ensure
76 * that we poll at the correct interval.
78 if (!unmounted && dev->vol_poll_interval && add_wait > dev->vol_poll_interval) {
79 add_wait = dev->vol_poll_interval;
83 Dmsg1(dbglvl, "blocked=%s\n", dev->print_blocked());
84 dev->dev_prev_blocked = dev->blocked();
85 dev->set_blocked(BST_WAITING_FOR_SYSOP); /* indicate waiting for mount */
88 for ( ; !job_canceled(jcr); ) {
89 time_t now, start, total_waited;
91 gettimeofday(&tv, &tz);
92 timeout.tv_nsec = tv.tv_usec * 1000;
93 timeout.tv_sec = tv.tv_sec + add_wait;
95 Dmsg4(dbglvl, "I'm going to sleep on device %s. HB=%d rem_wait=%d add_wait=%d\n",
96 dev->print_name(), (int)me->heartbeat_interval, dev->rem_wait_sec, add_wait);
99 /* Wait required time */
100 stat = dev->next_vol_timedwait(&timeout);
102 Dmsg2(dbglvl, "Wokeup from sleep on device stat=%d blocked=%s\n", stat,
103 dev->print_blocked());
105 total_waited = now - first_start;
106 dev->rem_wait_sec -= (now - start);
108 /* Note, this always triggers the first time. We want that. */
109 if (me->heartbeat_interval) {
110 if (now - last_heartbeat >= me->heartbeat_interval) {
111 /* send heartbeats */
112 if (jcr->file_bsock) {
113 jcr->file_bsock->signal(BNET_HEARTBEAT);
114 Dmsg0(dbglvl, "Send heartbeat to FD.\n");
116 if (jcr->dir_bsock) {
117 jcr->dir_bsock->signal(BNET_HEARTBEAT);
119 last_heartbeat = now;
123 if (stat == EINVAL) {
125 Jmsg1(jcr, M_FATAL, 0, _("pthread timedwait error. ERR=%s\n"), be.bstrerror(stat));
126 stat = W_ERROR; /* error */
131 * Continue waiting if operator is labeling volumes
133 if (dev->blocked() == BST_WRITING_LABEL) {
137 if (dev->rem_wait_sec <= 0) { /* on exceeding wait time return */
138 Dmsg0(dbglvl, "Exceed wait time.\n");
144 * Check if user unmounted the device while we were waiting
146 unmounted = dev->is_device_unmounted();
148 if (!unmounted && dev->vol_poll_interval &&
149 (total_waited >= dev->vol_poll_interval)) {
150 Dmsg1(dbglvl, "poll return in wait blocked=%s\n", dev->print_blocked());
151 dev->poll = true; /* returning a poll event */
156 * Check if user mounted the device while we were waiting
158 if (dev->blocked() == BST_MOUNT) { /* mount request ? */
159 Dmsg0(dbglvl, "Mounted return.\n");
165 * If we did not timeout, then some event happened, so
166 * return to check if state changed.
168 if (stat != ETIMEDOUT) {
170 Dmsg2(dbglvl, "Wake return. stat=%d. ERR=%s\n", stat, be.bstrerror(stat));
171 stat = W_WAKE; /* someone woke us */
176 * At this point, we know we woke up because of a timeout,
177 * that was due to a heartbeat, because any other reason would
178 * have caused us to return, so update the wait counters and continue.
180 add_wait = dev->rem_wait_sec;
181 if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
182 add_wait = me->heartbeat_interval;
184 /* If the user did not unmount the tape and we are polling, ensure
185 * that we poll at the correct interval.
187 if (!unmounted && dev->vol_poll_interval &&
188 add_wait > dev->vol_poll_interval - total_waited) {
189 add_wait = dev->vol_poll_interval - total_waited;
197 dev->set_blocked(dev->dev_prev_blocked); /* restore entry state */
198 Dmsg1(dbglvl, "set %s\n", dev->print_blocked());
200 Dmsg1(dbglvl, "Exit blocked=%s\n", dev->print_blocked());
207 * Wait for any device to be released, then we return, so
208 * higher level code can rescan possible devices. Since there
209 * could be a job waiting for a drive to free up, we wait a maximum
210 * of 1 minute then retry just in case a broadcast was lost, and
211 * we return to rescan the devices.
213 * Returns: true if a device has changed state
214 * false if the total wait time has expired.
216 bool wait_for_any_device(JCR *jcr, int &retries)
220 struct timespec timeout;
223 const int max_wait_time = 1 * 60; /* wait 1 minute */
226 Dmsg0(dbglvl, "Enter wait_for_any_device\n");
227 P(device_release_mutex);
229 if (++retries % 5 == 0) {
230 /* Print message every 5 minutes */
231 Jmsg(jcr, M_MOUNT, 0, _("JobId=%s, Job %s waiting to reserve a device.\n"),
232 edit_uint64(jcr->JobId, ed1), jcr->Job);
235 gettimeofday(&tv, &tz);
236 timeout.tv_nsec = tv.tv_usec * 1000;
237 timeout.tv_sec = tv.tv_sec + max_wait_time;
239 Dmsg0(dbglvl, "Going to wait for a device.\n");
241 /* Wait required time */
242 stat = pthread_cond_timedwait(&wait_device_release, &device_release_mutex, &timeout);
243 Dmsg1(dbglvl, "Wokeup from sleep on device stat=%d\n", stat);
245 V(device_release_mutex);
246 Dmsg1(dbglvl, "Return from wait_device ok=%d\n", ok);
251 * Wait for a specific device to be released
252 * We wait a maximum of 1 minute then
253 * retry just in case a broadcast was lost.
255 * Returns: true if the device has changed state
256 * false if the total wait time has expired.
258 bool wait_for_device(DCR *dcr, int &retries)
262 struct timespec timeout;
264 DEVICE *dev = dcr->dev;
267 const int max_wait_time = 1 * 60; /* wait 1 minute */
270 Dmsg3(40, "Enter wait_for_device. busy=%d dcrvol=%s devvol=%s\n",
271 dev->is_busy(), dcr->VolumeName, dev->getVolCatName());
273 P(device_release_mutex);
275 if (++retries % 5 == 0) {
276 /* Print message every 5 minutes */
277 Jmsg(jcr, M_MOUNT, 0, _("JobId=%s, Job %s waiting device %s.\n"),
278 edit_uint64(jcr->JobId, ed1), jcr->Job, dcr->dev->print_name());
281 gettimeofday(&tv, &tz);
282 timeout.tv_nsec = tv.tv_usec * 1000;
283 timeout.tv_sec = tv.tv_sec + max_wait_time;
285 Dmsg0(dbglvl, "Going to wait for a device.\n");
287 /* Wait required time */
288 stat = pthread_cond_timedwait(&wait_device_release, &device_release_mutex, &timeout);
289 Dmsg1(dbglvl, "Wokeup from sleep on device stat=%d\n", stat);
291 V(device_release_mutex);
292 Dmsg1(dbglvl, "Return from wait_device ok=%d\n", ok);
299 * The jcr timers are used for waiting on any device *
300 * Returns: true if time doubled
301 * false if max time expired
303 static bool double_jcr_wait_time(JCR *jcr)
305 jcr->wait_sec *= 2; /* double wait time */
306 if (jcr->wait_sec > jcr->max_wait) { /* but not longer than maxtime */
307 jcr->wait_sec = jcr->max_wait;
310 jcr->rem_wait_sec = jcr->wait_sec;
311 if (jcr->num_wait >= jcr->max_num_wait) {