2 This patch applies to Bacula version 2.2.4 (possibly earlier 2.2.x versions)
3 and fixes a Storage daemon authentication problem with the FD. This fixes
4 bug #953. The patch also adds a bit of additional debug code and significantly
5 strengthens the SD session key.
7 Apply it to 2.2.4 with:
10 patch -p0 <2.2.4-sd-auth-fail.patch
16 Index: src/stored/job.c
17 ===================================================================
18 --- src/stored/job.c (revision 5602)
19 +++ src/stored/job.c (working copy)
25 BSOCK *dir = jcr->dir_bsock;
26 POOL_MEM job_name, client_name, job, fileset_name, fileset_md5;
27 int JobType, level, spool_attributes, no_attributes, spool_data;
29 &write_part_after_job, &PreferMountedVols);
31 pm_strcpy(jcr->errmsg, dir->msg);
32 - bnet_fsend(dir, BAD_job, stat, jcr->errmsg);
33 + dir->fsend(BAD_job, stat, jcr->errmsg);
34 Dmsg1(100, ">dird: %s", dir->msg);
35 set_jcr_job_status(jcr, JS_ErrorTerminated);
39 * Pass back an authorization key for the File daemon
41 - make_session_key(auth_key, NULL, 1);
42 - bnet_fsend(dir, OKjob, jcr->VolSessionId, jcr->VolSessionTime, auth_key);
43 - Dmsg1(100, ">dird: %s", dir->msg);
44 + bsnprintf(seed, sizeof(seed), "%p%d", jcr, JobId);
45 + make_session_key(auth_key, seed, 1);
46 + dir->fsend(OKjob, jcr->VolSessionId, jcr->VolSessionTime, auth_key);
47 + Dmsg2(100, ">dird jid=%u: %s", (uint32_t)jcr->JobId, dir->msg);
48 jcr->sd_auth_key = bstrdup(auth_key);
49 memset(auth_key, 0, sizeof(auth_key));
50 generate_daemon_event(jcr, "JobStart");
52 timeout.tv_nsec = tv.tv_usec * 1000;
53 timeout.tv_sec = tv.tv_sec + me->client_wait;
55 - Dmsg2(100, "%s waiting %d sec for FD to contact SD\n",
56 - jcr->Job, (int)me->client_wait);
57 + Dmsg3(050, "%s waiting %d sec for FD to contact SD key=%s\n",
58 + jcr->Job, (int)me->client_wait, jcr->sd_auth_key);
61 * Wait for the File daemon to contact us to start the Job,
62 * when he does, we will be released, unless the 30 minutes
66 - for ( ; !job_canceled(jcr); ) {
67 + while ( !jcr->authenticated && !job_canceled(jcr) ) {
68 errstat = pthread_cond_timedwait(&jcr->job_start_wait, &mutex, &timeout);
69 - if (errstat == 0 || errstat == ETIMEDOUT) {
70 + if (errstat == ETIMEDOUT || errstat == EINVAL || errstat == EPERM) {
78 - * After receiving a connection (in job.c) if it is
79 + * After receiving a connection (in dircmd.c) if it is
80 * from the File daemon, this routine is called.
82 void handle_filed_connection(BSOCK *fd, char *job_name)
85 bmicrosleep(0, 50000); /* wait 50 millisecs */
86 if (!(jcr=get_jcr_by_full_name(job_name))) {
87 - Jmsg1(NULL, M_FATAL, 0, _("Job name not found: %s\n"), job_name);
88 - Dmsg1(100, "Job name not found: %s\n", job_name);
89 + Jmsg1(NULL, M_FATAL, 0, _("FD connect failed: Job name not found: %s\n"), job_name);
90 + Dmsg1(3, "**** Job \"%s\" not found", job_name);
96 if (jcr->authenticated) {
97 Jmsg2(jcr, M_FATAL, 0, _("Hey!!!! JobId %u Job %s already authenticated.\n"),
98 - jcr->JobId, jcr->Job);
99 + (uint32_t)jcr->JobId, jcr->Job);
104 Jmsg(jcr, M_FATAL, 0, _("Unable to authenticate File daemon\n"));
106 jcr->authenticated = true;
107 - Dmsg1(110, "OK Authentication Job %s\n", jcr->Job);
108 + Dmsg2(110, "OK Authentication jid=%u Job %s\n", (uint32_t)jcr->JobId, jcr->Job);
111 if (!jcr->authenticated) {
114 ok = dir_update_device(jcr, device->dev);
116 - ok = bnet_fsend(dir, OK_query);
117 + ok = dir->fsend(OK_query);
119 - bnet_fsend(dir, NO_query);
120 + dir->fsend(NO_query);
126 ok = dir_update_changer(jcr, changer);
128 - ok = bnet_fsend(dir, OK_query);
129 + ok = dir->fsend(OK_query);
131 - bnet_fsend(dir, NO_query);
132 + dir->fsend(NO_query);
136 @@ -299,12 +302,12 @@
137 /* If we get here, the device/autochanger was not found */
138 unbash_spaces(dir->msg);
139 pm_strcpy(jcr->errmsg, dir->msg);
140 - bnet_fsend(dir, NO_device, dev_name.c_str());
141 + dir->fsend(NO_device, dev_name.c_str());
142 Dmsg1(100, ">dird: %s\n", dir->msg);
144 unbash_spaces(dir->msg);
145 pm_strcpy(jcr->errmsg, dir->msg);
146 - bnet_fsend(dir, BAD_query, jcr->errmsg);
147 + dir->fsend(BAD_query, jcr->errmsg);
148 Dmsg1(100, ">dird: %s\n", dir->msg);
153 Dmsg1(900, "stored_free_jcr JobId=%u\n", jcr->JobId);
154 if (jcr->file_bsock) {
155 - bnet_close(jcr->file_bsock);
156 + jcr->file_bsock->close();
157 jcr->file_bsock = NULL;