2 This patch corrects a problem where the maximum concurrent storage
3 jobs counter gets out of sync during restore jobs causing jobs to
4 "wait on max Storage jobs". This patch fixes bug #1009.
6 Apply this patch to 2.2.6 and probably any 2.2.x version with the
10 patch -p0 <2.2.6-maxconcurrentjobs.patch
11 ./configure <your-options>
17 Index: src/dird/jobq.c
18 ===================================================================
19 --- src/dird/jobq.c (revision 6019)
20 +++ src/dird/jobq.c (working copy)
23 - * Bacula job queue routines.
25 - * This code consists of three queues, the waiting_jobs
26 - * queue, where jobs are initially queued, the ready_jobs
27 - * queue, where jobs are placed when all the resources are
28 - * allocated and they can immediately be run, and the
29 - * running queue where jobs are placed when they are
32 - * Kern Sibbald, July MMIII
36 - * This code was adapted from the Bacula workq, which was
37 - * adapted from "Programming with POSIX Threads", by
42 Bacula® - The Network Backup Solution
44 Copyright (C) 2003-2007 Free Software Foundation Europe e.V.
46 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
47 Switzerland, email:ftf@fsfeurope.org.
50 + * Bacula job queue routines.
52 + * This code consists of three queues, the waiting_jobs
53 + * queue, where jobs are initially queued, the ready_jobs
54 + * queue, where jobs are placed when all the resources are
55 + * allocated and they can immediately be run, and the
56 + * running queue where jobs are placed when they are
59 + * Kern Sibbald, July MMIII
63 + * This code was adapted from the Bacula workq, which was
64 + * adapted from "Programming with POSIX Threads", by
74 jq->running_jobs->append(je);
75 +// set_jcr_in_tsd(jcr);
76 Dmsg1(2300, "Took jobid=%d from ready and appended to run\n", jcr->JobId);
78 /* Release job queue lock */
80 jcr->acquired_resource_locks = false;
82 Dmsg1(200, "Rstore=%s\n", jcr->rstore->name());
83 - if (jcr->rstore->NumConcurrentJobs == 0 &&
84 - jcr->rstore->NumConcurrentJobs < jcr->rstore->MaxConcurrentJobs) {
85 - /* Simple case, first job */
87 + * Let only one Restore/Verify job run at a time regardless
88 + * of MaxConcurrentjobs.
90 + if (jcr->rstore->NumConcurrentJobs == 0) {
91 jcr->rstore->NumConcurrentJobs = 1;
92 Dmsg0(200, "Set rncj=1\n");
93 - } else if (jcr->rstore->NumConcurrentJobs < jcr->rstore->MaxConcurrentJobs) {
94 - jcr->rstore->NumConcurrentJobs++;
95 - Dmsg1(200, "Inc rncj=%d\n", jcr->rstore->NumConcurrentJobs);
97 Dmsg1(200, "Fail rncj=%d\n", jcr->rstore->NumConcurrentJobs);
98 set_jcr_job_status(jcr, JS_WaitStoreRes);
101 Dmsg1(200, "Wstore=%s\n", jcr->wstore->name());
102 if (jcr->rstore == jcr->wstore) { /* deadlock */
103 - jcr->rstore->NumConcurrentJobs--; /* back out rstore */
104 + jcr->rstore->NumConcurrentJobs = 0; /* back out rstore */
105 Jmsg(jcr, M_FATAL, 0, _("Job canceled. Attempt to read and write same device.\n"
106 " Read storage \"%s\" (From %s) -- Write storage \"%s\" (From %s)\n"),
107 jcr->rstore->name(), jcr->rstore_source, jcr->wstore->name(), jcr->wstore_source);
109 jcr->wstore->NumConcurrentJobs++;
110 Dmsg1(200, "Inc wncj=%d\n", jcr->wstore->NumConcurrentJobs);
111 } else if (jcr->rstore) {
112 - jcr->rstore->NumConcurrentJobs--; /* back out rstore */
113 + jcr->rstore->NumConcurrentJobs = 0; /* back out rstore */
114 Dmsg1(200, "Fail wncj=%d\n", jcr->wstore->NumConcurrentJobs);
115 skip_this_jcr = true;
118 Dmsg1(200, "Dec wncj=%d\n", jcr->wstore->NumConcurrentJobs);
121 - jcr->rstore->NumConcurrentJobs--;
122 + jcr->rstore->NumConcurrentJobs = 0;
123 Dmsg1(200, "Dec rncj=%d\n", jcr->rstore->NumConcurrentJobs);
125 set_jcr_job_status(jcr, JS_WaitClientRes);
127 Dmsg1(200, "Dec wncj=%d\n", jcr->wstore->NumConcurrentJobs);
130 - jcr->rstore->NumConcurrentJobs--;
131 + jcr->rstore->NumConcurrentJobs = 0;
132 Dmsg1(200, "Dec rncj=%d\n", jcr->rstore->NumConcurrentJobs);
134 jcr->client->NumConcurrentJobs--;