-/*
- *
- * Bacula Director -- msgchan.c -- handles the message channel
- * to the Storage daemon and the File daemon.
- *
- * Kern Sibbald, August MM
- *
- * This routine runs as a thread and must be thread reentrant.
- *
- * Basic tasks done here:
- * Open a message channel with the Storage daemon
- * to authenticate ourself and to pass the JobId.
- * Create a thread to interact with the Storage daemon
- * who returns a job status and requests Catalog services, etc.
- *
- * Version $Id$
- */
/*
Bacula® - The Network Backup Solution
- Copyright (C) 2000-2006 Free Software Foundation Europe e.V.
+ Copyright (C) 2000-2008 Free Software Foundation Europe e.V.
The main author of Bacula is Kern Sibbald, with contributions from
many others, a complete list can be found in the file AUTHORS.
This program is Free Software; you can redistribute it and/or
modify it under the terms of version two of the GNU General Public
- License as published by the Free Software Foundation plus additions
- that are listed in the file LICENSE.
+ License as published by the Free Software Foundation and included
+ in the file LICENSE.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
- Bacula® is a registered trademark of John Walker.
+ Bacula® is a registered trademark of Kern Sibbald.
The licensor of Bacula is the Free Software Foundation Europe
(FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
Switzerland, email:ftf@fsfeurope.org.
*/
+/*
+ *
+ * Bacula Director -- msgchan.c -- handles the message channel
+ * to the Storage daemon and the File daemon.
+ *
+ * Kern Sibbald, August MM
+ *
+ * This routine runs as a thread and must be thread reentrant.
+ *
+ * Basic tasks done here:
+ * Open a message channel with the Storage daemon
+ * to authenticate ourself and to pass the JobId.
+ * Create a thread to interact with the Storage daemon
+ * who returns a job status and requests Catalog services, etc.
+ *
+ * Version $Id$
+ */
#include "bacula.h"
#include "dird.h"
/* Commands sent to Storage daemon */
static char jobcmd[] = "JobId=%s job=%s job_name=%s client_name=%s "
"type=%d level=%d FileSet=%s NoAttr=%d SpoolAttr=%d FileSetMD5=%s "
- "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d\n";
+ "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d SpoolSize=%s\n";
static char use_storage[] = "use storage=%s media_type=%s pool_name=%s "
"pool_type=%s append=%d copy=%d stripe=%d\n";
static char use_device[] = "use device=%s\n";
/* Storage Daemon requests */
static char Job_start[] = "3010 Job %127s start\n";
static char Job_end[] =
- "3099 Job %127s end JobStatus=%d JobFiles=%d JobBytes=%" lld "\n";
+ "3099 Job %127s end JobStatus=%d JobFiles=%d JobBytes=%lld JobErrors=%u\n";
/* Forward referenced functions */
extern "C" void *msg_thread(void *arg);
bool connect_to_storage_daemon(JCR *jcr, int retry_interval,
int max_retry_time, int verbose)
{
- BSOCK *sd;
+ BSOCK *sd = new_bsock();
STORE *store;
+ utime_t heart_beat;
if (jcr->store_bsock) {
return true; /* already connected */
store = jcr->rstore;
}
+ if (store->heartbeat_interval) {
+ heart_beat = store->heartbeat_interval;
+ } else {
+ heart_beat = director->heartbeat_interval;
+ }
+
/*
* Open message channel with the Storage daemon
*/
Dmsg2(100, "bnet_connect to Storage daemon %s:%d\n", store->address,
store->SDport);
- sd = bnet_connect(jcr, retry_interval, max_retry_time,
- _("Storage daemon"), store->address,
- NULL, store->SDport, verbose);
+ sd->set_source_address(director->DIRsrc_addr);
+ if (!sd->connect(jcr, retry_interval, max_retry_time, heart_beat, _("Storage daemon"),
+ store->address, NULL, store->SDport, verbose)) {
+ sd->destroy();
+ sd = NULL;
+ }
+
if (sd == NULL) {
return false;
}
jcr->store_bsock = sd;
if (!authenticate_storage_daemon(jcr, store)) {
- bnet_close(sd);
+ sd->close();
jcr->store_bsock = NULL;
return false;
}
* Here we ask the SD to send us the info for a
* particular device resource.
*/
-#ifdef needed
+#ifdef xxx
bool update_device_res(JCR *jcr, DEVICE *dev)
{
POOL_MEM device_name;
return false;
}
sd = jcr->store_bsock;
- pm_strcpy(device_name, dev->hdr.name);
+ pm_strcpy(device_name, dev->name());
bash_spaces(device_name);
- bnet_fsend(sd, query_device, device_name.c_str());
+ sd->fsend(query_device, device_name.c_str());
Dmsg1(100, ">stored: %s\n", sd->msg);
/* The data is returned through Device_update */
if (bget_dirmsg(sd) <= 0) {
}
#endif
+static char OKbootstrap[] = "3000 OK bootstrap\n";
+
/*
* Start a job with the Storage daemon
*/
-bool start_storage_daemon_job(JCR *jcr, alist *rstore, alist *wstore)
+bool start_storage_daemon_job(JCR *jcr, alist *rstore, alist *wstore, bool send_bsr)
{
bool ok = true;
STORE *storage;
POOL_MEM job_name, client_name, fileset_name;
int copy = 0;
int stripe = 0;
- char ed1[30];
+ char ed1[30], ed2[30];
sd = jcr->store_bsock;
/*
* Now send JobId and permissions, and get back the authorization key.
*/
- pm_strcpy(job_name, jcr->job->hdr.name);
+ pm_strcpy(job_name, jcr->job->name());
bash_spaces(job_name);
- pm_strcpy(client_name, jcr->client->hdr.name);
+ pm_strcpy(client_name, jcr->client->name());
bash_spaces(client_name);
- pm_strcpy(fileset_name, jcr->fileset->hdr.name);
+ pm_strcpy(fileset_name, jcr->fileset->name());
bash_spaces(fileset_name);
if (jcr->fileset->MD5[0] == 0) {
bstrncpy(jcr->fileset->MD5, "**Dummy**", sizeof(jcr->fileset->MD5));
* for the same jobid.
*/
if (jcr->reschedule_count) {
- bnet_fsend(sd, "cancel Job=%s\n", jcr->Job);
- while (bnet_recv(sd) >= 0)
+ sd->fsend("cancel Job=%s\n", jcr->Job);
+ while (sd->recv() >= 0)
{ }
}
- bnet_fsend(sd, jobcmd, edit_int64(jcr->JobId, ed1), jcr->Job,
- job_name.c_str(), client_name.c_str(),
- jcr->JobType, jcr->JobLevel,
- fileset_name.c_str(), !jcr->pool->catalog_files,
- jcr->job->SpoolAttributes, jcr->fileset->MD5, jcr->spool_data,
- jcr->write_part_after_job, jcr->job->PreferMountedVolumes);
- Dmsg1(100, ">stored: %s\n", sd->msg);
+ sd->fsend(jobcmd, edit_int64(jcr->JobId, ed1), jcr->Job,
+ job_name.c_str(), client_name.c_str(),
+ jcr->get_JobType(), jcr->get_JobLevel(),
+ fileset_name.c_str(), !jcr->pool->catalog_files,
+ jcr->job->SpoolAttributes, jcr->fileset->MD5, jcr->spool_data,
+ jcr->write_part_after_job, jcr->job->PreferMountedVolumes,
+ edit_int64(jcr->spool_size, ed2));
+ Dmsg1(100, ">stored: %s", sd->msg);
if (bget_dirmsg(sd) > 0) {
Dmsg1(100, "<stored: %s", sd->msg);
if (sscanf(sd->msg, OKjob, &jcr->VolSessionId,
&jcr->VolSessionTime, &auth_key) != 3) {
Dmsg1(100, "BadJob=%s\n", sd->msg);
Jmsg(jcr, M_FATAL, 0, _("Storage daemon rejected Job command: %s\n"), sd->msg);
- return 0;
+ return false;
} else {
jcr->sd_auth_key = bstrdup(auth_key);
Dmsg1(150, "sd_auth_key=%s\n", jcr->sd_auth_key);
}
} else {
Jmsg(jcr, M_FATAL, 0, _("<stored: bad response to Job command: %s\n"),
- bnet_strerror(sd));
- return 0;
+ sd->bstrerror());
+ return false;
}
- pm_strcpy(pool_type, jcr->pool->pool_type);
- pm_strcpy(pool_name, jcr->pool->hdr.name);
- bash_spaces(pool_type);
- bash_spaces(pool_name);
+ if (send_bsr && (!send_bootstrap_file(jcr, sd) ||
+ !response(jcr, sd, OKbootstrap, "Bootstrap", DISPLAY_ERROR))) {
+ return false;
+ }
/*
* We have two loops here. The first comes from the
*/
/* Do read side of storage daemon */
if (ok && rstore) {
+ /* For the moment, only migrate, copy and vbackup have rpool */
+ if (jcr->get_JobType() == JT_MIGRATE || jcr->get_JobType() == JT_COPY ||
+ (jcr->get_JobType() == JT_BACKUP && jcr->get_JobLevel() == L_VIRTUAL_FULL)) {
+ pm_strcpy(pool_type, jcr->rpool->pool_type);
+ pm_strcpy(pool_name, jcr->rpool->name());
+ } else {
+ pm_strcpy(pool_type, jcr->pool->pool_type);
+ pm_strcpy(pool_name, jcr->pool->name());
+ }
+ bash_spaces(pool_type);
+ bash_spaces(pool_name);
foreach_alist(storage, rstore) {
Dmsg1(100, "Rstore=%s\n", storage->name());
pm_strcpy(store_name, storage->name());
bash_spaces(store_name);
pm_strcpy(media_type, storage->media_type);
bash_spaces(media_type);
- bnet_fsend(sd, use_storage, store_name.c_str(), media_type.c_str(),
- pool_name.c_str(), pool_type.c_str(), 0, copy, stripe);
-
+ sd->fsend(use_storage, store_name.c_str(), media_type.c_str(),
+ pool_name.c_str(), pool_type.c_str(), 0, copy, stripe);
+ Dmsg1(100, "rstore >stored: %s", sd->msg);
DEVICE *dev;
/* Loop over alternative storage Devices until one is OK */
foreach_alist(dev, storage->device) {
- pm_strcpy(device_name, dev->hdr.name);
+ pm_strcpy(device_name, dev->name());
bash_spaces(device_name);
- bnet_fsend(sd, use_device, device_name.c_str());
+ sd->fsend(use_device, device_name.c_str());
Dmsg1(100, ">stored: %s", sd->msg);
}
- bnet_sig(sd, BNET_EOD); /* end of Devices */
+ sd->signal(BNET_EOD); /* end of Devices */
}
- bnet_sig(sd, BNET_EOD); /* end of Storages */
+ sd->signal(BNET_EOD); /* end of Storages */
if (bget_dirmsg(sd) > 0) {
Dmsg1(100, "<stored: %s", sd->msg);
/* ****FIXME**** save actual device name */
/* Do write side of storage daemon */
if (ok && wstore) {
+ pm_strcpy(pool_type, jcr->pool->pool_type);
+ pm_strcpy(pool_name, jcr->pool->name());
+ bash_spaces(pool_type);
+ bash_spaces(pool_name);
foreach_alist(storage, wstore) {
- Dmsg1(100, "Wstore=%s\n", storage->name());
pm_strcpy(store_name, storage->name());
bash_spaces(store_name);
pm_strcpy(media_type, storage->media_type);
bash_spaces(media_type);
- bnet_fsend(sd, use_storage, store_name.c_str(), media_type.c_str(),
- pool_name.c_str(), pool_type.c_str(), 1, copy, stripe);
+ sd->fsend(use_storage, store_name.c_str(), media_type.c_str(),
+ pool_name.c_str(), pool_type.c_str(), 1, copy, stripe);
+ Dmsg1(100, "wstore >stored: %s", sd->msg);
DEVICE *dev;
/* Loop over alternative storage Devices until one is OK */
foreach_alist(dev, storage->device) {
- pm_strcpy(device_name, dev->hdr.name);
+ pm_strcpy(device_name, dev->name());
bash_spaces(device_name);
- bnet_fsend(sd, use_device, device_name.c_str());
+ sd->fsend(use_device, device_name.c_str());
Dmsg1(100, ">stored: %s", sd->msg);
}
- bnet_sig(sd, BNET_EOD); /* end of Devices */
+ sd->signal(BNET_EOD); /* end of Devices */
}
- bnet_sig(sd, BNET_EOD); /* end of Storages */
+ sd->signal(BNET_EOD); /* end of Storages */
if (bget_dirmsg(sd) > 0) {
Dmsg1(100, "<stored: %s", sd->msg);
/* ****FIXME**** save actual device name */
" Storage daemon didn't accept Device \"%s\" command.\n"),
device_name.c_str());
}
+ } else {
+ Jmsg(jcr, M_INFO, 0, _("Using Device \"%s\"\n"), device_name.c_str());
}
return ok;
}
* Start a thread to handle Storage daemon messages and
* Catalog requests.
*/
-int start_storage_daemon_message_thread(JCR *jcr)
+bool start_storage_daemon_message_thread(JCR *jcr)
{
int status;
pthread_t thid;
Dmsg0(100, "Start SD msg_thread.\n");
if ((status=pthread_create(&thid, NULL, msg_thread, (void *)jcr)) != 0) {
berrno be;
- Jmsg1(jcr, M_ABORT, 0, _("Cannot create message thread: %s\n"), be.strerror(status));
+ Jmsg1(jcr, M_ABORT, 0, _("Cannot create message thread: %s\n"), be.bstrerror(status));
}
/* Wait for thread to start */
while (jcr->SD_msg_chan == 0) {
bmicrosleep(0, 50);
+ if (job_canceled(jcr) || jcr->sd_msg_thread_done) {
+ return false;
+ }
}
Dmsg1(100, "SD msg_thread started. use=%d\n", jcr->use_count());
- return 1;
+ return true;
}
extern "C" void msg_thread_cleanup(void *arg)
jcr->sd_msg_thread_done = true;
jcr->SD_msg_chan = 0;
pthread_cond_broadcast(&jcr->term_wait); /* wakeup any waiting threads */
- Dmsg1(100, "=== End msg_thread. use=%d\n", jcr->use_count());
+ Dmsg2(100, "=== End msg_thread. JobId=%d usecnt=%d\n", jcr->JobId, jcr->use_count());
free_jcr(jcr); /* release jcr */
+ db_thread_cleanup(); /* remove thread specific data */
}
/*
BSOCK *sd;
int JobStatus;
char Job[MAX_NAME_LENGTH];
- uint32_t JobFiles;
+ uint32_t JobFiles, JobErrors;
uint64_t JobBytes;
- int stat;
pthread_detach(pthread_self());
+ set_jcr_in_tsd(jcr);
jcr->SD_msg_chan = pthread_self();
pthread_cleanup_push(msg_thread_cleanup, arg);
sd = jcr->store_bsock;
if (sscanf(sd->msg, Job_start, Job) == 1) {
continue;
}
- if ((stat=sscanf(sd->msg, Job_end, Job, &JobStatus, &JobFiles,
- &JobBytes)) == 4) {
+ if (sscanf(sd->msg, Job_end, Job, &JobStatus, &JobFiles,
+ &JobBytes, &JobErrors) == 5) {
jcr->SDJobStatus = JobStatus; /* termination status */
jcr->SDJobFiles = JobFiles;
jcr->SDJobBytes = JobBytes;
+ jcr->SDErrors = JobErrors;
break;
}
- Dmsg2(400, "end loop stat=%d use=%d\n", stat, jcr->use_count());
+ Dmsg1(400, "end loop use=%d\n", jcr->use_count());
}
if (is_bnet_error(sd)) {
jcr->SDJobStatus = JS_ErrorTerminated;
V(mutex);
if (job_canceled(jcr)) {
if (jcr->SD_msg_chan) {
- jcr->store_bsock->timed_out = 1;
- jcr->store_bsock->terminated = 1;
+ jcr->store_bsock->set_timed_out();
+ jcr->store_bsock->set_terminated();
Dmsg2(400, "kill jobid=%d use=%d\n", (int)jcr->JobId, jcr->use_count());
pthread_kill(jcr->SD_msg_chan, TIMEOUT_SIGNAL);
}
LockRes();
foreach_res(dev, R_DEVICE) {
if (!update_device_res(jcr, dev)) {
- Dmsg1(900, "Error updating device=%s\n", dev->hdr.name);
+ Dmsg1(900, "Error updating device=%s\n", dev->name());
} else {
- Dmsg1(900, "Updated Device=%s\n", dev->hdr.name);
+ Dmsg1(900, "Updated Device=%s\n", dev->name());
}
}
UnlockRes();
Dmsg0(100, "Start Device thread.\n");
if ((status=pthread_create(&thid, NULL, device_thread, NULL)) != 0) {
berrno be;
- Jmsg1(NULL, M_ABORT, 0, _("Cannot create message thread: %s\n"), be.strerror(status));
+ Jmsg1(NULL, M_ABORT, 0, _("Cannot create message thread: %s\n"), be.bstrerror(status));
}
}
#endif