- Document that ChangerDevice is used for Alert command.
For 1.37:
+- Fix 3993 error in SD. It forgets to look at autochanger
+ resource for device command, ...
- --without-openssl breaks at least on Solaris.
- Python:
- Make a callback when Rerun failed levels is called.
General:
+Changes to 1.37.19:
+18May05
+- Modify wait during use_device to happen only after all devices
+ have been examined rather than in the reserve_device code.
+
Changes to 1.37.18:
16May05
- Add more debug to SD for Autochangers + status output.
* We "reserve" the drive by setting the ST_READ bit. No one else
* should touch the drive until that is cleared.
* This allows the DIR to "reserve" the device before actually
- * starting the job. If the device is not available, the DIR
- * can wait (to be implemented 1/05).
+ * starting the job.
*/
bool reserve_device_for_read(DCR *dcr)
{
DEVICE *dev = dcr->dev;
JCR *jcr = dcr->jcr;
- bool first;
+ bool ok = false;
ASSERT(dcr);
- init_device_wait_timers(dcr);
-
dev->block(BST_DOING_ACQUIRE);
- Mmsg(jcr->errmsg, _("Device %s is BLOCKED due to user unmount.\n"),
- dev->print_name());
- for (first=true; device_is_unmounted(dev); first=false) {
- dev->unblock();
- if (!wait_for_device(dcr, jcr->errmsg, first)) {
- return false;
- }
- dev->block(BST_DOING_ACQUIRE);
+ if (device_is_unmounted(dev)) {
+ Mmsg(jcr->errmsg, _("Device %s is BLOCKED due to user unmount.\n"),
+ dev->print_name());
+ goto bail_out;
}
- Mmsg2(jcr->errmsg, _("Device %s is busy. Job %d canceled.\n"),
- dev->print_name(), jcr->JobId);
- for (first=true; dev->is_busy(); first=false) {
- dev->unblock();
- if (!wait_for_device(dcr, jcr->errmsg, first)) {
- return false;
- }
- dev->block(BST_DOING_ACQUIRE);
+ if (dev->is_busy()) {
+ Mmsg1(jcr->errmsg, _("Device %s is busy.\n"),
+ dev->print_name());
+ goto bail_out;
}
dev->clear_append();
dev->set_read();
+ ok = true;
+
+bail_out:
dev->unblock();
- return true;
+ return ok;
}
JCR *jcr = dcr->jcr;
DEVICE *dev = dcr->dev;
bool ok = false;
- bool first;
ASSERT(dcr);
- init_device_wait_timers(dcr);
-
dev->block(BST_DOING_ACQUIRE);
- Mmsg1(jcr->errmsg, _("Device %s is busy reading.\n"),
- dev->print_name());
- for (first=true; dev->can_read(); first=false) {
- dev->unblock();
- if (!wait_for_device(dcr, jcr->errmsg, first)) {
- return false;
- }
- dev->block(BST_DOING_ACQUIRE);
+ if (dev->can_read()) {
+ Mmsg1(jcr->errmsg, _("Device %s is busy reading.\n"), dev->print_name());
+ goto bail_out;
}
-
- Mmsg(jcr->errmsg, _("Device %s is BLOCKED due to user unmount.\n"),
- dev->print_name());
- for (first=true; device_is_unmounted(dev); first=false) {
- dev->unblock();
- if (!wait_for_device(dcr, jcr->errmsg, first)) {
- return false;
- }
- dev->block(BST_DOING_ACQUIRE);
+ if (device_is_unmounted(dev)) {
+ Mmsg(jcr->errmsg, _("Device %s is BLOCKED due to user unmount.\n"), dev->print_name());
+ goto bail_out;
}
Dmsg1(190, "reserve_append device is %s\n", dev->is_tape()?"tape":"disk");
- for ( ;; ) {
- switch (can_reserve_drive(dcr)) {
- case 0:
- Mmsg1(jcr->errmsg, _("Device %s is busy writing on another Volume.\n"), dev->print_name());
- dev->unblock();
- if (!wait_for_device(dcr, jcr->errmsg, first)) {
- return false;
- }
- dev->block(BST_DOING_ACQUIRE);
- continue;
- case -1:
- goto bail_out; /* error */
- default:
- break; /* OK, reserve drive */
- }
- break;
+ if (can_reserve_drive(dcr) != 1) {
+ Mmsg1(jcr->errmsg, _("Device %s is busy writing on another Volume.\n"), dev->print_name());
+ goto bail_out;
}
-
dev->reserved_device++;
dcr->reserved_device = true;
ok = true;
}
+void init_jcr_device_wait_timers(JCR *jcr)
+{
+ /* ******FIXME******* put these on config variables */
+ jcr->min_wait = 60 * 60;
+ jcr->max_wait = 24 * 60 * 60;
+ jcr->max_num_wait = 9; /* 5 waits =~ 1 day, then 1 day at a time */
+ jcr->wait_sec = jcr->min_wait;
+ jcr->rem_wait_sec = jcr->wait_sec;
+ jcr->num_wait = 0;
+}
+
+
/*
* The dev timers are used for waiting on a particular device
*
if (sscanf(dir->msg, "cancel Job=%127s", Job) == 1) {
if (!(jcr=get_jcr_by_full_name(Job))) {
- bnet_fsend(dir, _("3992 Job %s not found.\n"), Job);
+ bnet_fsend(dir, _("3902 Job %s not found.\n"), Job);
} else {
P(jcr->mutex);
oldStatus = jcr->JobStatus;
free_jcr(jcr);
}
} else {
- bnet_fsend(dir, _("3993 Error scanning cancel command.\n"));
+ bnet_fsend(dir, _("3903 Error scanning cancel command.\n"));
}
bnet_sig(dir, BNET_EOD);
return 1;
Copyright (C) 2000-2005 Kern Sibbald
This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of
- the License, or (at your option) any later version.
+ modify it under the terms of the GNU General Public License
+ version 2 as ammended with additional clauses defined in the
+ file LICENSE in the main source directory.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public
- License along with this program; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- MA 02111-1307, USA.
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ the file LICENSE for additional details.
*/
static char OKjob[] = "3000 OK Job SDid=%u SDtime=%u Authorization=%s\n";
static char OK_device[] = "3000 OK use device device=%s\n";
static char NO_device[] = "3924 Device \"%s\" not in SD Device resources.\n";
-static char NOT_open[] = "3925 Device \"%s\" could not be opened or does not exist.\n";
+//static char NOT_open[] = "3925 Device \"%s\" could not be opened or does not exist.\n";
static char BAD_use[] = "3913 Bad use command: %s\n";
static char BAD_job[] = "3915 Bad Job command: %s\n";
//static char OK_query[] = "3001 OK query\n";
* Wiffle through them and find one that can do the backup.
*/
if (ok) {
- store = (DIRSTORE *)dirstore->first();
- foreach_alist(device_name, store->device) {
- if (search_res_for_device(jcr, store, device_name, append) == 1) {
- dcr = jcr->dcr;
- dcr->Copy = Copy;
- dcr->Stripe = Stripe;
- ok = true;
- goto done;
+ bool first = true;
+ init_jcr_device_wait_timers(jcr);
+ for ( ;; ) {
+ int need_wait = false;
+ foreach_alist(store, dirstore) {
+ foreach_alist(device_name, store->device) {
+ int stat;
+ stat = search_res_for_device(jcr, store, device_name, append);
+ if (stat == 1) { /* found available device */
+ dcr = jcr->dcr;
+ dcr->Copy = Copy;
+ dcr->Stripe = Stripe;
+ ok = true;
+ goto done;
+ } else if (stat == 0) { /* device busy */
+ need_wait = true;
+ }
+ }
+ }
+ /*
+ * If there is some device for which we can wait, then
+ * wait and try again until the wait time expires
+ */
+ if (!need_wait || !wait_for_device(jcr, jcr->errmsg, first)) {
+ break;
}
+ first = false;
}
if (verbose) {
unbash_spaces(dir->msg);
Jmsg(jcr, M_WARNING, 0, _("\n"
" Device \"%s\" requested by DIR could not be opened or does not exist.\n"),
device_name);
- bnet_fsend(dir, NOT_open, device_name);
- Dmsg1(100, ">dird: %s\n", dir->msg);
- return -1;
+ return 0;
}
Dmsg1(100, "Found device %s\n", device->hdr.name);
dcr = new_dcr(jcr, device->dev);
ok = reserve_device_for_read(dcr);
}
if (!ok) {
- bnet_fsend(dir, _("3927 Could not reserve device: %s\n"), device_name);
- Dmsg1(100, ">dird: %s\n", dir->msg);
free_dcr(jcr->dcr);
return 0;
}
bash_spaces(device_name);
ok = bnet_fsend(dir, OK_device, device_name);
Dmsg1(100, ">dird: %s\n", dir->msg);
- return ok;
+ return ok ? 1 : -1;
}
}
foreach_res(changer, R_AUTOCHANGER) {
bash_spaces(dev_name);
ok = bnet_fsend(dir, OK_device, dev_name.c_str()); /* Return real device name */
Dmsg1(100, ">dird: %s\n", dir->msg);
- return ok;
+ return ok ? 1 : -1;
}
}
}
- return 0;
+ return 0; /* nothing found */
}
bool offline_or_rewind_dev(DEVICE *dev);
bool reposition_dev(DEVICE *dev, uint32_t file, uint32_t block);
void init_device_wait_timers(DCR *dcr);
+void init_jcr_device_wait_timers(JCR *jcr);
bool double_dev_wait_time(DEVICE *dev);
/* Get info about device */
/* From wait.c */
int wait_for_sysop(DCR *dcr);
-bool wait_for_device(DCR *dcr, const char *msg, bool first);
+bool wait_for_device(JCR *jcr, const char *msg, bool first);
Copyright (C) 2000-2005 Kern Sibbald
This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of
- the License, or (at your option) any later version.
+ modify it under the terms of the GNU General Public License
+ version 2 as ammended with additional clauses defined in the
+ file LICENSE in the main source directory.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public
- License along with this program; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- MA 02111-1307, USA.
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ the file LICENSE for additional details.
*/
P(dev->mutex);
unmounted = (dev->dev_blocked == BST_UNMOUNTED) ||
- (dev->dev_blocked == BST_UNMOUNTED_WAITING_FOR_SYSOP);
+ (dev->dev_blocked == BST_UNMOUNTED_WAITING_FOR_SYSOP);
dev->poll = false;
/*
- * Wait requested time (dev->rem_wait_sec). However, we also wake up every
- * HB_TIME seconds and send a heartbeat to the FD and the Director
- * to keep stateful firewalls from closing them down while waiting
- * for the operator.
+ * Wait requested time (dev->rem_wait_sec). However, we also wake up every
+ * HB_TIME seconds and send a heartbeat to the FD and the Director
+ * to keep stateful firewalls from closing them down while waiting
+ * for the operator.
*/
add_wait = dev->rem_wait_sec;
if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
timeout.tv_sec = tv.tv_sec + add_wait;
Dmsg3(400, "I'm going to sleep on device %s. HB=%d wait=%d\n", dev->print_name(),
- (int)me->heartbeat_interval, dev->wait_sec);
+ (int)me->heartbeat_interval, dev->wait_sec);
start = time(NULL);
/* Wait required time */
stat = pthread_cond_timedwait(&dev->wait_next_vol, &dev->mutex, &timeout);
/* Note, this always triggers the first time. We want that. */
if (me->heartbeat_interval) {
- if (now - last_heartbeat >= me->heartbeat_interval) {
- /* send heartbeats */
- if (jcr->file_bsock) {
- bnet_sig(jcr->file_bsock, BNET_HEARTBEAT);
+ if (now - last_heartbeat >= me->heartbeat_interval) {
+ /* send heartbeats */
+ if (jcr->file_bsock) {
+ bnet_sig(jcr->file_bsock, BNET_HEARTBEAT);
Dmsg0(400, "Send heartbeat to FD.\n");
- }
- if (jcr->dir_bsock) {
- bnet_sig(jcr->dir_bsock, BNET_HEARTBEAT);
- }
- last_heartbeat = now;
- }
+ }
+ if (jcr->dir_bsock) {
+ bnet_sig(jcr->dir_bsock, BNET_HEARTBEAT);
+ }
+ last_heartbeat = now;
+ }
}
/*
* Check if user unmounted the device while we were waiting
*/
unmounted = (dev->dev_blocked == BST_UNMOUNTED) ||
- (dev->dev_blocked == BST_UNMOUNTED_WAITING_FOR_SYSOP);
+ (dev->dev_blocked == BST_UNMOUNTED_WAITING_FOR_SYSOP);
- if (stat != ETIMEDOUT) { /* we blocked the device */
- break; /* on error return */
+ if (stat != ETIMEDOUT) { /* we blocked the device */
+ break; /* on error return */
}
if (dev->rem_wait_sec <= 0) { /* on exceeding wait time return */
Dmsg0(400, "Exceed wait time.\n");
- break;
+ break;
}
if (!unmounted && dev->vol_poll_interval &&
- (now - first_start >= dev->vol_poll_interval)) {
+ (now - first_start >= dev->vol_poll_interval)) {
Dmsg1(400, "In wait blocked=%s\n", edit_blocked_reason(dev));
- dev->poll = true; /* returning a poll event */
- break;
+ dev->poll = true; /* returning a poll event */
+ break;
}
/*
* Check if user mounted the device while we were waiting
*/
if (dev->dev_blocked == BST_MOUNT) { /* mount request ? */
- stat = 0;
- break;
+ stat = 0;
+ break;
}
add_wait = dev->wait_sec - (now - start);
if (add_wait < 0) {
- add_wait = 0;
+ add_wait = 0;
}
if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
- add_wait = me->heartbeat_interval;
+ add_wait = me->heartbeat_interval;
}
}
/*
- * Wait for Device to be released
- *
+ * Wait for any device to be released, then we return, so
+ * higher level code can rescan possible devices.
+ *
+ * Returns: true if a device has changed state
+ * false if the total wait time has expired.
*/
-bool wait_for_device(DCR *dcr, const char *msg, bool first)
+bool wait_for_device(JCR *jcr, const char *msg, bool first)
{
struct timeval tv;
struct timezone tz;
// time_t last_heartbeat = 0;
int stat = 0;
int add_wait;
- DEVICE *dev = dcr->dev;
- JCR *jcr = dcr->jcr;
bool ok = false;
Dmsg0(100, "Enter wait_for_device\n");
}
/*
- * Wait requested time (dev->rem_wait_sec). However, we also wake up every
- * HB_TIME seconds and send a heartbeat to the FD and the Director
- * to keep stateful firewalls from closing them down while waiting
- * for the operator.
+ * Wait requested time (dev->rem_wait_sec). However, we also wake up every
+ * HB_TIME seconds and send a heartbeat to the FD and the Director
+ * to keep stateful firewalls from closing them down while waiting
+ * for the operator.
*/
add_wait = jcr->rem_wait_sec;
if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
timeout.tv_nsec = tv.tv_usec * 1000;
timeout.tv_sec = tv.tv_sec + add_wait;
- Dmsg4(100, "I'm going to sleep on device %s. HB=%d wait=%d remwait=%d\n", dev->print_name(),
- (int)me->heartbeat_interval, jcr->wait_sec, jcr->rem_wait_sec);
+ Dmsg3(100, "I'm going to wait for a device. HB=%d wait=%d remwait=%d\n",
+ (int)me->heartbeat_interval, jcr->wait_sec, jcr->rem_wait_sec);
start = time(NULL);
/* Wait required time */
stat = pthread_cond_timedwait(&wait_device_release, &device_release_mutex, &timeout);
#ifdef needed
/* Note, this always triggers the first time. We want that. */
if (me->heartbeat_interval) {
- if (now - last_heartbeat >= me->heartbeat_interval) {
- /* send heartbeats */
- if (jcr->file_bsock) {
- bnet_sig(jcr->file_bsock, BNET_HEARTBEAT);
+ if (now - last_heartbeat >= me->heartbeat_interval) {
+ /* send heartbeats */
+ if (jcr->file_bsock) {
+ bnet_sig(jcr->file_bsock, BNET_HEARTBEAT);
Dmsg0(400, "Send heartbeat to FD.\n");
- }
- if (jcr->dir_bsock) {
- bnet_sig(jcr->dir_bsock, BNET_HEARTBEAT);
- }
- last_heartbeat = now;
- }
+ }
+ if (jcr->dir_bsock) {
+ bnet_sig(jcr->dir_bsock, BNET_HEARTBEAT);
+ }
+ last_heartbeat = now;
+ }
}
#endif
- if (stat != ETIMEDOUT) { /* if someone woke us up */
- ok = true;
- break; /* allow caller to examine device */
+ if (stat != ETIMEDOUT) { /* if someone woke us up */
+ ok = true;
+ break; /* allow caller to examine device */
}
if (jcr->rem_wait_sec <= 0) { /* on exceeding wait time return */
Dmsg0(400, "Exceed wait time.\n");
- if (!double_jcr_wait_time(jcr)) {
- break; /* give up */
- }
- Jmsg(jcr, M_MOUNT, 0, msg);
+ if (!double_jcr_wait_time(jcr)) {
+ break; /* give up */
+ }
+ Jmsg(jcr, M_MOUNT, 0, msg);
}
add_wait = jcr->wait_sec - (now - start);
if (add_wait < 0) {
- add_wait = 0;
+ add_wait = 0;
}
if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
- add_wait = me->heartbeat_interval;
+ add_wait = me->heartbeat_interval;
}
}
* The jcr timers are used for waiting on any device
*
* Returns: true if time doubled
- * false if max time expired
+ * false if max time expired
*/
static bool double_jcr_wait_time(JCR *jcr)
{
- jcr->wait_sec *= 2; /* double wait time */
+ jcr->wait_sec *= 2; /* double wait time */
if (jcr->wait_sec > jcr->max_wait) { /* but not longer than maxtime */
jcr->wait_sec = jcr->max_wait;
}
/* */
#undef VERSION
-#define VERSION "1.37.18"
-#define BDATE "16 May 2005"
-#define LSMDATE "16May05"
+#define VERSION "1.37.19"
+#define BDATE "18 May 2005"
+#define LSMDATE "18May05"
/* Debug flags */
#undef DEBUG