- Implement Diff,Inc Retention Periods
Priority:
+- Auto update of slot:
+ rufus-dir: ua_run.c:456-10 JobId=10 NewJobId=10 using pool Full priority=10
+ 02-Nov 12:58 rufus-dir JobId 10: Start Backup JobId 10, Job=kernsave.2007-11-02_12.58.03
+ 02-Nov 12:58 rufus-dir JobId 10: Using Device "DDS-4"
+ 02-Nov 12:58 rufus-sd JobId 10: Invalid slot=0 defined in catalog for Volume "Vol001" on "DDS-4" (/dev/nst0). Manual load my be required.
+ 02-Nov 12:58 rufus-sd JobId 10: 3301 Issuing autochanger "loaded? drive 0" command.
+ 02-Nov 12:58 rufus-sd JobId 10: 3302 Autochanger "loaded? drive 0", result is Slot 2.
+ 02-Nov 12:58 rufus-sd JobId 10: Wrote label to prelabeled Volume "Vol001" on device "DDS-4" (/dev/nst0)
+ 02-Nov 12:58 rufus-sd JobId 10: Alert: TapeAlert[7]: Media Life: The tape has reached the end of its useful life.
+ 02-Nov 12:58 rufus-dir JobId 10: Bacula rufus-dir 2.3.6 (26Oct07): 02-Nov-2007 12:58:51
- Encrypt sd_auth_key = s with director's key = d
k[i] = s[i] + (d[i] & 0xF)) & 0xFF + 'A' skip -
Decrypt key = k with director's key
--- /dev/null
+
+ This patch fixes bug #947 where a large number of emails were generated
+ because the heartbeat interval was small and the tape in the drive did
+ not correspond to the one wanted by Bacula.
+
+ Apply the patch to version 2.2.5 (and probably any 2.2.x version) with:
+
+ cd <bacula-source>
+ ./configure <your options>
+ patch -p0 <2.2.5-hb.patch
+ make
+ ...
+ make install
+
+
+Index: src/stored/wait.c
+===================================================================
+--- src/stored/wait.c (revision 5814)
++++ src/stored/wait.c (working copy)
+@@ -40,9 +40,8 @@
+ #include "bacula.h" /* pull in global headers */
+ #include "stored.h" /* pull in Storage Deamon headers */
+
+-//static bool double_jcr_wait_time(JCR *jcr);
++const int dbglvl = 400;
+
+-
+ /*
+ * Wait for SysOp to mount a tape on a specific device
+ *
+@@ -62,7 +61,7 @@
+ JCR *jcr = dcr->jcr;
+
+ dev->dlock();
+- Dmsg1(100, "Enter blocked=%s\n", dev->print_blocked());
++ Dmsg1(dbglvl, "Enter blocked=%s\n", dev->print_blocked());
+ unmounted = is_device_unmounted(dev);
+
+ dev->poll = false;
+@@ -84,27 +83,28 @@
+ }
+
+ if (!unmounted) {
+- Dmsg1(400, "blocked=%s\n", dev->print_blocked());
++ Dmsg1(dbglvl, "blocked=%s\n", dev->print_blocked());
+ dev->dev_prev_blocked = dev->blocked();
+ dev->set_blocked(BST_WAITING_FOR_SYSOP); /* indicate waiting for mount */
+ }
+
+ for ( ; !job_canceled(jcr); ) {
+- time_t now, start;
++ time_t now, start, total_waited;
+
+ gettimeofday(&tv, &tz);
+ timeout.tv_nsec = tv.tv_usec * 1000;
+ timeout.tv_sec = tv.tv_sec + add_wait;
+
+- Dmsg4(400, "I'm going to sleep on device %s. HB=%d wait=%d add_wait=%d\n",
+- dev->print_name(), (int)me->heartbeat_interval, dev->wait_sec, add_wait);
++ Dmsg4(dbglvl, "I'm going to sleep on device %s. HB=%d rem_wait=%d add_wait=%d\n",
++ dev->print_name(), (int)me->heartbeat_interval, dev->rem_wait_sec, add_wait);
+ start = time(NULL);
+ /* Wait required time */
+ stat = pthread_cond_timedwait(&dev->wait_next_vol, &dev->m_mutex, &timeout);
+- Dmsg2(400, "Wokeup from sleep on device stat=%d blocked=%s\n", stat,
++ Dmsg2(dbglvl, "Wokeup from sleep on device stat=%d blocked=%s\n", stat,
+ dev->print_blocked());
+
+ now = time(NULL);
++ total_waited = now - first_start;
+ dev->rem_wait_sec -= (now - start);
+
+ /* Note, this always triggers the first time. We want that. */
+@@ -113,7 +113,7 @@
+ /* send heartbeats */
+ if (jcr->file_bsock) {
+ jcr->file_bsock->signal(BNET_HEARTBEAT);
+- Dmsg0(400, "Send heartbeat to FD.\n");
++ Dmsg0(dbglvl, "Send heartbeat to FD.\n");
+ }
+ if (jcr->dir_bsock) {
+ jcr->dir_bsock->signal(BNET_HEARTBEAT);
+@@ -131,7 +131,7 @@
+
+
+ if (dev->rem_wait_sec <= 0) { /* on exceeding wait time return */
+- Dmsg0(400, "Exceed wait time.\n");
++ Dmsg0(dbglvl, "Exceed wait time.\n");
+ stat = W_TIMEOUT;
+ break;
+ }
+@@ -142,8 +142,8 @@
+ unmounted = is_device_unmounted(dev);
+
+ if (!unmounted && dev->vol_poll_interval &&
+- (now - first_start >= dev->vol_poll_interval)) {
+- Dmsg1(400, "In wait blocked=%s\n", dev->print_blocked());
++ (total_waited >= dev->vol_poll_interval)) {
++ Dmsg1(dbglvl, "poll return in wait blocked=%s\n", dev->print_blocked());
+ dev->poll = true; /* returning a poll event */
+ stat = W_POLL;
+ break;
+@@ -152,6 +152,7 @@
+ * Check if user mounted the device while we were waiting
+ */
+ if (dev->blocked() == BST_MOUNT) { /* mount request ? */
++ Dmsg0(dbglvl, "Mounted return.\n");
+ stat = W_MOUNT;
+ break;
+ }
+@@ -160,30 +161,39 @@
+ * If we did not timeout, then some event happened, so
+ * return to check if state changed.
+ */
+- if (stat != 0) {
++ if (stat != ETIMEDOUT) {
++ berrno be;
++ Dmsg2(dbglvl, "Wake return. stat=%d. ERR=%s\n", stat, be.bstrerror(stat));
+ stat = W_WAKE; /* someone woke us */
+ break;
+ }
+
+ /*
+ * At this point, we know we woke up because of a timeout,
+- * that was due to a heartbeat, so we just update
+- * the wait counters and continue.
++ * that was due to a heartbeat, because any other reason would
++ * have caused us to return, so update the wait counters and continue.
+ */
+- add_wait = dev->wait_sec - (now - start);
++ add_wait = dev->rem_wait_sec;
++ if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
++ add_wait = me->heartbeat_interval;
++ }
++ /* If the user did not unmount the tape and we are polling, ensure
++ * that we poll at the correct interval.
++ */
++ if (!unmounted && dev->vol_poll_interval &&
++ add_wait > dev->vol_poll_interval - total_waited) {
++ add_wait = dev->vol_poll_interval - total_waited;
++ }
+ if (add_wait < 0) {
+ add_wait = 0;
+ }
+- if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
+- add_wait = me->heartbeat_interval;
+- }
+ }
+
+ if (!unmounted) {
+ dev->set_blocked(dev->dev_prev_blocked); /* restore entry state */
+- Dmsg1(400, "set %s\n", dev->print_blocked());
++ Dmsg1(dbglvl, "set %s\n", dev->print_blocked());
+ }
+- Dmsg1(400, "Exit blocked=%s\n", dev->print_blocked());
++ Dmsg1(dbglvl, "Exit blocked=%s\n", dev->print_blocked());
+ dev->dunlock();
+ return stat;
+ }
+@@ -209,7 +219,7 @@
+ const int max_wait_time = 1 * 60; /* wait 1 minute */
+ char ed1[50];
+
+- Dmsg0(100, "Enter wait_for_device\n");
++ Dmsg0(dbglvl, "Enter wait_for_device\n");
+ P(device_release_mutex);
+
+ if (++retries % 5 == 0) {
+@@ -222,14 +232,14 @@
+ timeout.tv_nsec = tv.tv_usec * 1000;
+ timeout.tv_sec = tv.tv_sec + max_wait_time;
+
+- Dmsg1(100, "JobId=%u going to wait for a device.\n", (uint32_t)jcr->JobId);
++ Dmsg0(dbglvl, "Going to wait for a device.\n");
+
+ /* Wait required time */
+ stat = pthread_cond_timedwait(&wait_device_release, &device_release_mutex, &timeout);
+- Dmsg2(100, "JobId=%u wokeup from sleep on device stat=%d\n", (uint32_t)jcr->JobId, stat);
++ Dmsg1(dbglvl, "Wokeup from sleep on device stat=%d\n", stat);
+
+ V(device_release_mutex);
+- Dmsg2(100, "JobId=%u return from wait_device ok=%d\n", (uint32_t)jcr->JobId, ok);
++ Dmsg1(dbglvl, "Return from wait_device ok=%d\n", ok);
+ return ok;
+ }
+
#include "bacula.h" /* pull in global headers */
#include "stored.h" /* pull in Storage Deamon headers */
-//static bool double_jcr_wait_time(JCR *jcr);
-
+const int dbglvl = 400;
/*
* Wait for SysOp to mount a tape on a specific device
JCR *jcr = dcr->jcr;
dev->dlock();
- Dmsg1(100, "Enter blocked=%s\n", dev->print_blocked());
+ Dmsg1(dbglvl, "Enter blocked=%s\n", dev->print_blocked());
unmounted = is_device_unmounted(dev);
dev->poll = false;
}
if (!unmounted) {
- Dmsg1(400, "blocked=%s\n", dev->print_blocked());
+ Dmsg1(dbglvl, "blocked=%s\n", dev->print_blocked());
dev->dev_prev_blocked = dev->blocked();
dev->set_blocked(BST_WAITING_FOR_SYSOP); /* indicate waiting for mount */
}
for ( ; !job_canceled(jcr); ) {
- time_t now, start;
+ time_t now, start, total_waited;
gettimeofday(&tv, &tz);
timeout.tv_nsec = tv.tv_usec * 1000;
timeout.tv_sec = tv.tv_sec + add_wait;
- Dmsg4(400, "I'm going to sleep on device %s. HB=%d wait=%d add_wait=%d\n",
- dev->print_name(), (int)me->heartbeat_interval, dev->wait_sec, add_wait);
+ Dmsg4(dbglvl, "I'm going to sleep on device %s. HB=%d rem_wait=%d add_wait=%d\n",
+ dev->print_name(), (int)me->heartbeat_interval, dev->rem_wait_sec, add_wait);
start = time(NULL);
/* Wait required time */
stat = pthread_cond_timedwait(&dev->wait_next_vol, &dev->m_mutex, &timeout);
- Dmsg2(400, "Wokeup from sleep on device stat=%d blocked=%s\n", stat,
+ Dmsg2(dbglvl, "Wokeup from sleep on device stat=%d blocked=%s\n", stat,
dev->print_blocked());
now = time(NULL);
+ total_waited = now - first_start;
dev->rem_wait_sec -= (now - start);
/* Note, this always triggers the first time. We want that. */
/* send heartbeats */
if (jcr->file_bsock) {
jcr->file_bsock->signal(BNET_HEARTBEAT);
- Dmsg0(400, "Send heartbeat to FD.\n");
+ Dmsg0(dbglvl, "Send heartbeat to FD.\n");
}
if (jcr->dir_bsock) {
jcr->dir_bsock->signal(BNET_HEARTBEAT);
if (dev->rem_wait_sec <= 0) { /* on exceeding wait time return */
- Dmsg0(400, "Exceed wait time.\n");
+ Dmsg0(dbglvl, "Exceed wait time.\n");
stat = W_TIMEOUT;
break;
}
unmounted = is_device_unmounted(dev);
if (!unmounted && dev->vol_poll_interval &&
- (now - first_start >= dev->vol_poll_interval)) {
- Dmsg1(400, "In wait blocked=%s\n", dev->print_blocked());
+ (total_waited >= dev->vol_poll_interval)) {
+ Dmsg1(dbglvl, "poll return in wait blocked=%s\n", dev->print_blocked());
dev->poll = true; /* returning a poll event */
stat = W_POLL;
break;
* Check if user mounted the device while we were waiting
*/
if (dev->blocked() == BST_MOUNT) { /* mount request ? */
+ Dmsg0(dbglvl, "Mounted return.\n");
stat = W_MOUNT;
break;
}
* If we did not timeout, then some event happened, so
* return to check if state changed.
*/
- if (stat != 0) {
+ if (stat != ETIMEDOUT) {
+ berrno be;
+ Dmsg2(dbglvl, "Wake return. stat=%d. ERR=%s\n", stat, be.bstrerror(stat));
stat = W_WAKE; /* someone woke us */
break;
}
/*
* At this point, we know we woke up because of a timeout,
- * that was due to a heartbeat, so we just update
- * the wait counters and continue.
+ * that was due to a heartbeat, because any other reason would
+ * have caused us to return, so update the wait counters and continue.
*/
- add_wait = dev->wait_sec - (now - start);
- if (add_wait < 0) {
- add_wait = 0;
- }
+ add_wait = dev->rem_wait_sec;
if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
add_wait = me->heartbeat_interval;
}
+ /* If the user did not unmount the tape and we are polling, ensure
+ * that we poll at the correct interval.
+ */
+ if (!unmounted && dev->vol_poll_interval &&
+ add_wait > dev->vol_poll_interval - total_waited) {
+ add_wait = dev->vol_poll_interval - total_waited;
+ }
+ if (add_wait < 0) {
+ add_wait = 0;
+ }
}
if (!unmounted) {
dev->set_blocked(dev->dev_prev_blocked); /* restore entry state */
- Dmsg1(400, "set %s\n", dev->print_blocked());
+ Dmsg1(dbglvl, "set %s\n", dev->print_blocked());
}
- Dmsg1(400, "Exit blocked=%s\n", dev->print_blocked());
+ Dmsg1(dbglvl, "Exit blocked=%s\n", dev->print_blocked());
dev->dunlock();
return stat;
}
const int max_wait_time = 1 * 60; /* wait 1 minute */
char ed1[50];
- Dmsg0(100, "Enter wait_for_device\n");
+ Dmsg0(dbglvl, "Enter wait_for_device\n");
P(device_release_mutex);
if (++retries % 5 == 0) {
timeout.tv_nsec = tv.tv_usec * 1000;
timeout.tv_sec = tv.tv_sec + max_wait_time;
- Dmsg1(100, "JobId=%u going to wait for a device.\n", (uint32_t)jcr->JobId);
+ Dmsg0(dbglvl, "Going to wait for a device.\n");
/* Wait required time */
stat = pthread_cond_timedwait(&wait_device_release, &device_release_mutex, &timeout);
- Dmsg2(100, "JobId=%u wokeup from sleep on device stat=%d\n", (uint32_t)jcr->JobId, stat);
+ Dmsg1(dbglvl, "Wokeup from sleep on device stat=%d\n", stat);
V(device_release_mutex);
- Dmsg2(100, "JobId=%u return from wait_device ok=%d\n", (uint32_t)jcr->JobId, ok);
+ Dmsg1(dbglvl, "Return from wait_device ok=%d\n", ok);
return ok;
}
Technical notes on version 2.3
General:
+02Nov07
+kes Fix bug #942 where lots of emails where generated when the heartbeat
+ interval was low and Bacula wanted a different tape from the one in
+ the drive.
25Oct07
ebl Add new SD despooling attributes and Dir inserting attributes
job status codes in the catalog (Status table).