--- /dev/null
+
+ This patch should fix the race condition that leads to a Director
+ crash at job end time when the job list is updated. This was reported
+ in bug #1162.
+
+ Apply this patch to Bacula version 2.4.2 (and earlier) with:
+
+ cd <bacula-source>
+ patch -p0 <2.4.2-jobend-crash.patch
+ ./configure <your-options>
+ make
+ ...
+ make install
+
+
+Index: src/lib/jcr.c
+===================================================================
+--- src/lib/jcr.c (revision 7566)
++++ src/lib/jcr.c (working copy)
+@@ -110,6 +110,7 @@
+ void term_last_jobs_list()
+ {
+ if (last_jobs) {
++ lock_last_jobs_list();
+ while (!last_jobs->empty()) {
+ void *je = last_jobs->first();
+ last_jobs->remove(je);
+@@ -117,6 +118,7 @@
+ }
+ delete last_jobs;
+ last_jobs = NULL;
++ unlock_last_jobs_list();
+ }
+ if (jcrs) {
+ delete jcrs;
+@@ -128,6 +130,7 @@
+ {
+ struct s_last_job *je, job;
+ uint32_t num;
++ bool ok = true;
+
+ Dmsg1(100, "read_last_jobs seek to %d\n", (int)addr);
+ if (addr == 0 || lseek(fd, (off_t)addr, SEEK_SET) < 0) {
+@@ -140,11 +143,13 @@
+ if (num > 4 * max_last_jobs) { /* sanity check */
+ return false;
+ }
++ lock_last)jobs_list();
+ for ( ; num; num--) {
+ if (read(fd, &job, sizeof(job)) != sizeof(job)) {
+ berrno be;
+ Pmsg1(000, "Read job entry. ERR=%s\n", be.bstrerror());
+- return false;
++ ok = false;
++ break;
+ }
+ if (job.JobId > 0) {
+ je = (struct s_last_job *)malloc(sizeof(struct s_last_job));
+@@ -160,7 +165,8 @@
+ }
+ }
+ }
+- return true;
++ unlock_last_jobs_list();
++ return ok;
+ }
+
+ uint64_t write_last_jobs_list(int fd, uint64_t addr)
+@@ -173,20 +179,22 @@
+ return 0;
+ }
+ if (last_jobs) {
++ lock_last)jobs_list();
+ /* First record is number of entires */
+ num = last_jobs->size();
+ if (write(fd, &num, sizeof(num)) != sizeof(num)) {
+ berrno be;
+ Pmsg1(000, "Error writing num_items: ERR=%s\n", be.bstrerror());
+- return 0;
++ goto bail_out;
+ }
+ foreach_dlist(je, last_jobs) {
+ if (write(fd, je, sizeof(struct s_last_job)) != sizeof(struct s_last_job)) {
+ berrno be;
+ Pmsg1(000, "Error writing job: ERR=%s\n", be.bstrerror());
+- return 0;
++ got bail_out;
+ }
+ }
++ unlock_last_jobs_list();
+ }
+ /* Return current address */
+ ssize_t stat = lseek(fd, 0, SEEK_CUR);
+@@ -195,6 +203,9 @@
+ }
+ return stat;
+
++bail_out:
++ unlock_last_jobs_list();
++ return 0;
+ }
+
+ void lock_last_jobs_list()
+@@ -331,6 +342,7 @@
+ last_job.end_time = time(NULL);
+ /* Keep list of last jobs, but not Console where JobId==0 */
+ if (last_job.JobId > 0) {
++ lock_last_jobs_list();
+ je = (struct s_last_job *)malloc(sizeof(struct s_last_job));
+ memcpy((char *)je, (char *)&last_job, sizeof(last_job));
+ if (!last_jobs) {
+@@ -342,6 +354,7 @@
+ last_jobs->remove(je);
+ free(je);
+ }
++ unlock_last_jobs_list();
+ }
+ break;
+ default:
void term_last_jobs_list()
{
if (last_jobs) {
+ lock_last_jobs_list();
while (!last_jobs->empty()) {
void *je = last_jobs->first();
last_jobs->remove(je);
}
delete last_jobs;
last_jobs = NULL;
+ unlock_last_jobs_list();
}
if (jcrs) {
delete jcrs;
{
struct s_last_job *je, job;
uint32_t num;
+ bool ok = true;
Dmsg1(100, "read_last_jobs seek to %d\n", (int)addr);
if (addr == 0 || lseek(fd, (off_t)addr, SEEK_SET) < 0) {
if (num > 4 * max_last_jobs) { /* sanity check */
return false;
}
+ lock_last)jobs_list();
for ( ; num; num--) {
if (read(fd, &job, sizeof(job)) != sizeof(job)) {
berrno be;
Pmsg1(000, "Read job entry. ERR=%s\n", be.bstrerror());
- return false;
+ ok = false;
+ break;
}
if (job.JobId > 0) {
je = (struct s_last_job *)malloc(sizeof(struct s_last_job));
}
}
}
- return true;
+ unlock_last_jobs_list();
+ return ok;
}
uint64_t write_last_jobs_list(int fd, uint64_t addr)
return 0;
}
if (last_jobs) {
+ lock_last)jobs_list();
/* First record is number of entires */
num = last_jobs->size();
if (write(fd, &num, sizeof(num)) != sizeof(num)) {
berrno be;
Pmsg1(000, "Error writing num_items: ERR=%s\n", be.bstrerror());
- return 0;
+ goto bail_out;
}
foreach_dlist(je, last_jobs) {
if (write(fd, je, sizeof(struct s_last_job)) != sizeof(struct s_last_job)) {
berrno be;
Pmsg1(000, "Error writing job: ERR=%s\n", be.bstrerror());
- return 0;
+ got bail_out;
}
}
+ unlock_last_jobs_list();
}
/* Return current address */
ssize_t stat = lseek(fd, 0, SEEK_CUR);
}
return stat;
+bail_out:
+ unlock_last_jobs_list();
+ return 0;
}
void lock_last_jobs_list()
last_job.end_time = time(NULL);
/* Keep list of last jobs, but not Console where JobId==0 */
if (last_job.JobId > 0) {
+ lock_last_jobs_list();
je = (struct s_last_job *)malloc(sizeof(struct s_last_job));
memcpy((char *)je, (char *)&last_job, sizeof(last_job));
if (!last_jobs) {
last_jobs->remove(je);
free(je);
}
+ unlock_last_jobs_list();
}
break;
default: