2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2014 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from many
7 others, a complete list can be found in the file AUTHORS.
9 You may use this file and others of this release according to the
10 license defined in the LICENSE file, which includes the Affero General
11 Public License, v3.0 ("AGPLv3") and some additional permissions and
12 terms pursuant to its AGPLv3 Section 7.
14 Bacula® is a registered trademark of Kern Sibbald.
17 * Subroutines to handle waiting for operator intervention
18 * or waiting for a Device to be released
20 * Code for wait_for_sysop() pulled from askdir.c
22 * Kern Sibbald, March 2005
27 #include "bacula.h" /* pull in global headers */
28 #include "stored.h" /* pull in Storage Deamon headers */
30 const int dbglvl = 400;
33 * Wait for SysOp to mount a tape on a specific device
35 * Returns: W_ERROR, W_TIMEOUT, W_POLL, W_MOUNT, or W_WAKE
37 int wait_for_sysop(DCR *dcr)
41 struct timespec timeout;
42 time_t last_heartbeat = 0;
43 time_t first_start = time(NULL);
47 DEVICE *dev = dcr->dev;
51 Dmsg1(dbglvl, "Enter blocked=%s\n", dev->print_blocked());
54 * Since we want to mount a tape, make sure current one is
55 * not marked as using this drive.
59 unmounted = dev->is_device_unmounted();
62 * Wait requested time (dev->rem_wait_sec). However, we also wake up every
63 * HB_TIME seconds and send a heartbeat to the FD and the Director
64 * to keep stateful firewalls from closing them down while waiting
67 add_wait = dev->rem_wait_sec;
68 if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
69 add_wait = me->heartbeat_interval;
71 /* If the user did not unmount the tape and we are polling, ensure
72 * that we poll at the correct interval.
74 if (!unmounted && dev->vol_poll_interval && add_wait > dev->vol_poll_interval) {
75 add_wait = dev->vol_poll_interval;
79 Dmsg1(dbglvl, "blocked=%s\n", dev->print_blocked());
80 dev->dev_prev_blocked = dev->blocked();
81 dev->set_blocked(BST_WAITING_FOR_SYSOP); /* indicate waiting for mount */
84 for ( ; !job_canceled(jcr); ) {
85 time_t now, start, total_waited;
87 gettimeofday(&tv, &tz);
88 timeout.tv_nsec = tv.tv_usec * 1000;
89 timeout.tv_sec = tv.tv_sec + add_wait;
91 Dmsg4(dbglvl, "I'm going to sleep on device %s. HB=%d rem_wait=%d add_wait=%d\n",
92 dev->print_name(), (int)me->heartbeat_interval, dev->rem_wait_sec, add_wait);
95 /* Wait required time */
96 stat = dev->next_vol_timedwait(&timeout);
98 Dmsg2(dbglvl, "Wokeup from sleep on device stat=%d blocked=%s\n", stat,
99 dev->print_blocked());
101 total_waited = now - first_start;
102 dev->rem_wait_sec -= (now - start);
104 /* Note, this always triggers the first time. We want that. */
105 if (me->heartbeat_interval) {
106 if (now - last_heartbeat >= me->heartbeat_interval) {
107 /* send heartbeats */
108 if (jcr->file_bsock) {
109 jcr->file_bsock->signal(BNET_HEARTBEAT);
110 Dmsg0(dbglvl, "Send heartbeat to FD.\n");
112 if (jcr->dir_bsock) {
113 jcr->dir_bsock->signal(BNET_HEARTBEAT);
115 last_heartbeat = now;
119 if (stat == EINVAL) {
121 Jmsg1(jcr, M_FATAL, 0, _("pthread timedwait error. ERR=%s\n"), be.bstrerror(stat));
122 stat = W_ERROR; /* error */
127 * Continue waiting if operator is labeling volumes
129 if (dev->blocked() == BST_WRITING_LABEL) {
133 if (dev->rem_wait_sec <= 0) { /* on exceeding wait time return */
134 Dmsg0(dbglvl, "Exceed wait time.\n");
140 * Check if user unmounted the device while we were waiting
142 unmounted = dev->is_device_unmounted();
144 if (!unmounted && dev->vol_poll_interval &&
145 (total_waited >= dev->vol_poll_interval)) {
146 Dmsg1(dbglvl, "poll return in wait blocked=%s\n", dev->print_blocked());
147 dev->poll = true; /* returning a poll event */
152 * Check if user mounted the device while we were waiting
154 if (dev->blocked() == BST_MOUNT) { /* mount request ? */
155 Dmsg0(dbglvl, "Mounted return.\n");
161 * If we did not timeout, then some event happened, so
162 * return to check if state changed.
164 if (stat != ETIMEDOUT) {
166 Dmsg2(dbglvl, "Wake return. stat=%d. ERR=%s\n", stat, be.bstrerror(stat));
167 stat = W_WAKE; /* someone woke us */
172 * At this point, we know we woke up because of a timeout,
173 * that was due to a heartbeat, because any other reason would
174 * have caused us to return, so update the wait counters and continue.
176 add_wait = dev->rem_wait_sec;
177 if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
178 add_wait = me->heartbeat_interval;
180 /* If the user did not unmount the tape and we are polling, ensure
181 * that we poll at the correct interval.
183 if (!unmounted && dev->vol_poll_interval &&
184 add_wait > dev->vol_poll_interval - total_waited) {
185 add_wait = dev->vol_poll_interval - total_waited;
193 dev->set_blocked(dev->dev_prev_blocked); /* restore entry state */
194 Dmsg1(dbglvl, "set %s\n", dev->print_blocked());
196 Dmsg1(dbglvl, "Exit blocked=%s\n", dev->print_blocked());
203 * Wait for any device to be released, then we return, so
204 * higher level code can rescan possible devices. Since there
205 * could be a job waiting for a drive to free up, we wait a maximum
206 * of 1 minute then retry just in case a broadcast was lost, and
207 * we return to rescan the devices.
209 * Returns: true if a device has changed state
210 * false if the total wait time has expired.
212 bool wait_for_any_device(JCR *jcr, int &retries)
216 struct timespec timeout;
219 const int max_wait_time = 1 * 60; /* wait 1 minute */
222 Dmsg0(dbglvl, "Enter wait_for_any_device\n");
223 P(device_release_mutex);
225 if (++retries % 5 == 0) {
226 /* Print message every 5 minutes */
227 Jmsg(jcr, M_MOUNT, 0, _("JobId=%s, Job %s waiting to reserve a device.\n"),
228 edit_uint64(jcr->JobId, ed1), jcr->Job);
231 gettimeofday(&tv, &tz);
232 timeout.tv_nsec = tv.tv_usec * 1000;
233 timeout.tv_sec = tv.tv_sec + max_wait_time;
235 Dmsg0(dbglvl, "Going to wait for a device.\n");
237 /* Wait required time */
238 stat = pthread_cond_timedwait(&wait_device_release, &device_release_mutex, &timeout);
239 Dmsg1(dbglvl, "Wokeup from sleep on device stat=%d\n", stat);
241 V(device_release_mutex);
242 Dmsg1(dbglvl, "Return from wait_device ok=%d\n", ok);
247 * Wait for a specific device to be released
248 * We wait a maximum of 1 minute then
249 * retry just in case a broadcast was lost.
251 * Returns: true if the device has changed state
252 * false if the total wait time has expired.
254 bool wait_for_device(DCR *dcr, int &retries)
258 struct timespec timeout;
260 DEVICE *dev = dcr->dev;
263 const int max_wait_time = 1 * 60; /* wait 1 minute */
266 Dmsg3(40, "Enter wait_for_device. busy=%d dcrvol=%s devvol=%s\n",
267 dev->is_busy(), dcr->VolumeName, dev->getVolCatName());
269 P(device_release_mutex);
271 if (++retries % 5 == 0) {
272 /* Print message every 5 minutes */
273 Jmsg(jcr, M_MOUNT, 0, _("JobId=%s, Job %s waiting device %s.\n"),
274 edit_uint64(jcr->JobId, ed1), jcr->Job, dcr->dev->print_name());
277 gettimeofday(&tv, &tz);
278 timeout.tv_nsec = tv.tv_usec * 1000;
279 timeout.tv_sec = tv.tv_sec + max_wait_time;
281 Dmsg0(dbglvl, "Going to wait for a device.\n");
283 /* Wait required time */
284 stat = pthread_cond_timedwait(&wait_device_release, &device_release_mutex, &timeout);
285 Dmsg1(dbglvl, "Wokeup from sleep on device stat=%d\n", stat);
287 V(device_release_mutex);
288 Dmsg1(dbglvl, "Return from wait_device ok=%d\n", ok);
295 * The jcr timers are used for waiting on any device *
296 * Returns: true if time doubled
297 * false if max time expired
299 static bool double_jcr_wait_time(JCR *jcr)
301 jcr->wait_sec *= 2; /* double wait time */
302 if (jcr->wait_sec > jcr->max_wait) { /* but not longer than maxtime */
303 jcr->wait_sec = jcr->max_wait;
306 jcr->rem_wait_sec = jcr->wait_sec;
307 if (jcr->num_wait >= jcr->max_num_wait) {