1 Index: src/dird/backup.c
2 ===================================================================
3 --- src/dird/backup.c (révision 7736)
4 +++ src/dird/backup.c (copie de travail)
9 -/* Come here only after starting SD thread */
10 +/* Come here only after starting SD thread
11 + * and we don't expect any EndJob message because the
12 + * the client don't have recieve the "backup" command.
15 set_jcr_job_status(jcr, JS_ErrorTerminated);
16 - Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
18 - wait_for_job_termination(jcr, FDConnectTimeout);
19 - Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
20 + Dmsg1(400, "wait for sd and fd. use=%d\n", jcr->use_count());
21 + /* Get status from SD and FD */
22 + wait_for_job_termination(jcr, false /* don't expect EndJob message*/);
23 + Dmsg1(400, "after wait for sd and fd. use=%d\n", jcr->use_count());
29 * Here we wait for the File daemon to signal termination,
30 * then we wait for the Storage daemon. When both
31 * are done, we return the job status.
32 * Also used by restore.c
34 -int wait_for_job_termination(JCR *jcr, int timeout)
35 +int wait_for_job_termination(JCR *jcr, bool expect_EndJob)
38 BSOCK *fd = jcr->file_bsock;
40 uint64_t JobBytes = 0;
45 set_jcr_job_status(jcr, JS_Running);
49 - tid = start_bsock_timer(fd, timeout); /* TODO: New timeout directive??? */
51 - /* Wait for Client to terminate */
52 - while ((n = bget_dirmsg(fd)) >= 0) {
54 - (sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
55 - &ReadBytes, &JobBytes, &Errors, &VSS, &Encrypt) == 7 ||
56 - sscanf(fd->msg, OldEndJob, &jcr->FDJobStatus, &JobFiles,
57 - &ReadBytes, &JobBytes, &Errors) == 5)) {
59 - set_jcr_job_status(jcr, jcr->FDJobStatus);
60 - Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
62 - Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
64 + /* Wait for Client to terminate
65 + * In some conditions, the client isn't able to send
66 + * any messages and we should not wait for ages
70 + while (OK && expect_EndJob) {
72 + /* Even if the job is canceled, we let a chance to FD to
73 + * send EndJob message
75 + if (job_canceled(jcr)) {
78 - if (job_canceled(jcr)) {
81 + /* wait for data few minutes */
82 + ret = fd->wait_data_intr(5*60, 0);
83 + if (ret == 1) { /* get data */
84 + n = bget_dirmsg(fd);
86 + (sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
87 + &ReadBytes, &JobBytes, &Errors, &VSS, &Encrypt) == 7 ||
88 + sscanf(fd->msg, OldEndJob, &jcr->FDJobStatus, &JobFiles,
89 + &ReadBytes, &JobBytes, &Errors) == 5)) {
91 + set_jcr_job_status(jcr, jcr->FDJobStatus);
92 + OK=false; /* end of loop */
94 + Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
97 + } /* else get timeout or network error */
99 + if (is_bnet_error(fd)) {
100 + Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
101 + job_type_to_str(jcr->get_JobType()), fd->bstrerror());
106 - stop_bsock_timer(tid);
109 - if (is_bnet_error(fd)) {
110 - Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
111 - job_type_to_str(jcr->get_JobType()), fd->bstrerror());
113 fd->signal(BNET_TERMINATE); /* tell Client we are terminating */
116 Index: src/dird/protos.h
117 ===================================================================
118 --- src/dird/protos.h (révision 7736)
119 +++ src/dird/protos.h (copie de travail)
121 extern bool find_recycled_volume(JCR *jcr, bool InChanger, MEDIA_DBR *mr);
124 -extern int wait_for_job_termination(JCR *jcr, int timeout=0);
125 +extern int wait_for_job_termination(JCR *jcr, bool expect_EndJob=true);
126 extern bool do_backup_init(JCR *jcr);
127 extern bool do_backup(JCR *jcr);
128 extern void backup_cleanup(JCR *jcr, int TermCode);