2 Bacula(R) - The Network Backup Solution
4 Copyright (C) 2000-2016 Kern Sibbald
6 The original author of Bacula is Kern Sibbald, with contributions
7 from many others, a complete list can be found in the file AUTHORS.
9 You may use this file and others of this release according to the
10 license defined in the LICENSE file, which includes the Affero General
11 Public License, v3.0 ("AGPLv3") and some additional permissions and
12 terms pursuant to its AGPLv3 Section 7.
14 This notice must be preserved when any source code is
15 conveyed and/or propagated.
17 Bacula(R) is a registered trademark of Kern Sibbald.
20 * Subroutines to handle waiting for operator intervention
21 * or waiting for a Device to be released
23 * Code for wait_for_sysop() pulled from askdir.c
25 * Kern Sibbald, March 2005
30 #include "bacula.h" /* pull in global headers */
31 #include "stored.h" /* pull in Storage Deamon headers */
33 const int dbglvl = 400;
36 * Wait for SysOp to mount a tape on a specific device
38 * Returns: W_ERROR, W_TIMEOUT, W_POLL, W_MOUNT, or W_WAKE
40 int wait_for_sysop(DCR *dcr)
44 struct timespec timeout;
45 time_t last_heartbeat = 0;
46 time_t first_start = time(NULL);
50 DEVICE *dev = dcr->dev;
54 Dmsg1(dbglvl, "Enter blocked=%s\n", dev->print_blocked());
57 * Since we want to mount a tape, make sure current one is
58 * not marked as using this drive.
62 unmounted = dev->is_device_unmounted();
65 * Wait requested time (dev->rem_wait_sec). However, we also wake up every
66 * HB_TIME seconds and send a heartbeat to the FD and the Director
67 * to keep stateful firewalls from closing them down while waiting
70 add_wait = dev->rem_wait_sec;
71 if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
72 add_wait = me->heartbeat_interval;
74 /* If the user did not unmount the tape and we are polling, ensure
75 * that we poll at the correct interval.
77 if (!unmounted && dev->vol_poll_interval && add_wait > dev->vol_poll_interval) {
78 add_wait = dev->vol_poll_interval;
82 Dmsg1(dbglvl, "blocked=%s\n", dev->print_blocked());
83 dev->dev_prev_blocked = dev->blocked();
84 dev->set_blocked(BST_WAITING_FOR_SYSOP); /* indicate waiting for mount */
87 for ( ; !job_canceled(jcr); ) {
88 time_t now, start, total_waited;
90 gettimeofday(&tv, &tz);
91 timeout.tv_nsec = tv.tv_usec * 1000;
92 timeout.tv_sec = tv.tv_sec + add_wait;
94 Dmsg4(dbglvl, "I'm going to sleep on device %s. HB=%d rem_wait=%d add_wait=%d\n",
95 dev->print_name(), (int)me->heartbeat_interval, dev->rem_wait_sec, add_wait);
98 /* Wait required time */
99 stat = dev->next_vol_timedwait(&timeout);
101 Dmsg2(dbglvl, "Wokeup from sleep on device stat=%d blocked=%s\n", stat,
102 dev->print_blocked());
104 total_waited = now - first_start;
105 dev->rem_wait_sec -= (now - start);
107 /* Note, this always triggers the first time. We want that. */
108 if (me->heartbeat_interval) {
109 if (now - last_heartbeat >= me->heartbeat_interval) {
110 /* send heartbeats */
111 if (jcr->file_bsock) {
112 jcr->file_bsock->signal(BNET_HEARTBEAT);
113 Dmsg0(dbglvl, "Send heartbeat to FD.\n");
115 if (jcr->dir_bsock) {
116 jcr->dir_bsock->signal(BNET_HEARTBEAT);
118 last_heartbeat = now;
122 if (stat == EINVAL) {
124 Jmsg1(jcr, M_FATAL, 0, _("pthread timedwait error. ERR=%s\n"), be.bstrerror(stat));
125 stat = W_ERROR; /* error */
130 * Continue waiting if operator is labeling volumes
132 if (dev->blocked() == BST_WRITING_LABEL) {
136 if (dev->rem_wait_sec <= 0) { /* on exceeding wait time return */
137 Dmsg0(dbglvl, "Exceed wait time.\n");
143 * Check if user unmounted the device while we were waiting
145 unmounted = dev->is_device_unmounted();
147 if (!unmounted && dev->vol_poll_interval &&
148 (total_waited >= dev->vol_poll_interval)) {
149 Dmsg1(dbglvl, "poll return in wait blocked=%s\n", dev->print_blocked());
150 dev->poll = true; /* returning a poll event */
155 * Check if user mounted the device while we were waiting
157 if (dev->blocked() == BST_MOUNT) { /* mount request ? */
158 Dmsg0(dbglvl, "Mounted return.\n");
164 * If we did not timeout, then some event happened, so
165 * return to check if state changed.
167 if (stat != ETIMEDOUT) {
169 Dmsg2(dbglvl, "Wake return. stat=%d. ERR=%s\n", stat, be.bstrerror(stat));
170 stat = W_WAKE; /* someone woke us */
175 * At this point, we know we woke up because of a timeout,
176 * that was due to a heartbeat, because any other reason would
177 * have caused us to return, so update the wait counters and continue.
179 add_wait = dev->rem_wait_sec;
180 if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
181 add_wait = me->heartbeat_interval;
183 /* If the user did not unmount the tape and we are polling, ensure
184 * that we poll at the correct interval.
186 if (!unmounted && dev->vol_poll_interval &&
187 add_wait > dev->vol_poll_interval - total_waited) {
188 add_wait = dev->vol_poll_interval - total_waited;
196 dev->set_blocked(dev->dev_prev_blocked); /* restore entry state */
197 Dmsg1(dbglvl, "set %s\n", dev->print_blocked());
199 Dmsg1(dbglvl, "Exit blocked=%s\n", dev->print_blocked());
206 * Wait for any device to be released, then we return, so
207 * higher level code can rescan possible devices. Since there
208 * could be a job waiting for a drive to free up, we wait a maximum
209 * of 1 minute then retry just in case a broadcast was lost, and
210 * we return to rescan the devices.
212 * Returns: true if a device has changed state
213 * false if the total wait time has expired.
215 bool wait_for_any_device(JCR *jcr, int &retries)
219 struct timespec timeout;
222 const int max_wait_time = 1 * 60; /* wait 1 minute */
225 Dmsg0(dbglvl, "Enter wait_for_any_device\n");
226 P(device_release_mutex);
228 if (++retries % 5 == 0) {
229 /* Print message every 5 minutes */
230 Jmsg(jcr, M_MOUNT, 0, _("JobId=%s, Job %s waiting to reserve a device.\n"),
231 edit_uint64(jcr->JobId, ed1), jcr->Job);
234 gettimeofday(&tv, &tz);
235 timeout.tv_nsec = tv.tv_usec * 1000;
236 timeout.tv_sec = tv.tv_sec + max_wait_time;
238 Dmsg0(dbglvl, "Going to wait for a device.\n");
240 /* Wait required time */
241 stat = pthread_cond_timedwait(&wait_device_release, &device_release_mutex, &timeout);
242 Dmsg1(dbglvl, "Wokeup from sleep on device stat=%d\n", stat);
244 V(device_release_mutex);
245 Dmsg1(dbglvl, "Return from wait_device ok=%d\n", ok);
250 * Wait for a specific device to be released
251 * We wait a maximum of 1 minute then
252 * retry just in case a broadcast was lost.
254 * Returns: true if the device has changed state
255 * false if the total wait time has expired.
257 bool wait_for_device(DCR *dcr, int &retries)
261 struct timespec timeout;
263 DEVICE *dev = dcr->dev;
266 const int max_wait_time = 1 * 60; /* wait 1 minute */
269 Dmsg3(40, "Enter wait_for_device. busy=%d dcrvol=%s devvol=%s\n",
270 dev->is_busy(), dcr->VolumeName, dev->getVolCatName());
272 P(device_release_mutex);
274 if (++retries % 5 == 0) {
275 /* Print message every 5 minutes */
276 Jmsg(jcr, M_MOUNT, 0, _("JobId=%s, Job %s waiting device %s.\n"),
277 edit_uint64(jcr->JobId, ed1), jcr->Job, dcr->dev->print_name());
280 gettimeofday(&tv, &tz);
281 timeout.tv_nsec = tv.tv_usec * 1000;
282 timeout.tv_sec = tv.tv_sec + max_wait_time;
284 Dmsg0(dbglvl, "Going to wait for a device.\n");
286 /* Wait required time */
287 stat = pthread_cond_timedwait(&wait_device_release, &device_release_mutex, &timeout);
288 Dmsg1(dbglvl, "Wokeup from sleep on device stat=%d\n", stat);
290 V(device_release_mutex);
291 Dmsg1(dbglvl, "Return from wait_device ok=%d\n", ok);
298 * The jcr timers are used for waiting on any device *
299 * Returns: true if time doubled
300 * false if max time expired
302 static bool double_jcr_wait_time(JCR *jcr)
304 jcr->wait_sec *= 2; /* double wait time */
305 if (jcr->wait_sec > jcr->max_wait) { /* but not longer than maxtime */
306 jcr->wait_sec = jcr->max_wait;
309 jcr->rem_wait_sec = jcr->wait_sec;
310 if (jcr->num_wait >= jcr->max_num_wait) {