2 * Subroutines to handle Catalog reqests sent to the Director
3 * Reqests/commands from the Director are handled in dircmd.c
5 * Kern Sibbald, December 2000
10 Copyright (C) 2000-2003 Kern Sibbald and John Walker
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of
15 the License, or (at your option) any later version.
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public
23 License along with this program; if not, write to the Free
24 Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
29 #include "bacula.h" /* pull in global headers */
30 #include "stored.h" /* pull in Storage Deamon headers */
32 /* Requests sent to the Director */
33 static char Find_media[] = "CatReq Job=%s FindMedia=%d\n";
34 static char Get_Vol_Info[] = "CatReq Job=%s GetVolInfo VolName=%s write=%d\n";
35 static char Update_media[] = "CatReq Job=%s UpdateMedia VolName=%s\
36 VolJobs=%u VolFiles=%u VolBlocks=%u VolBytes=%s VolMounts=%u\
37 VolErrors=%u VolWrites=%u MaxVolBytes=%s EndTime=%d VolStatus=%s\
38 Slot=%d relabel=%d\n";
40 static char Create_job_media[] = "CatReq Job=%s CreateJobMedia \
41 FirstIndex=%u LastIndex=%u StartFile=%u EndFile=%u \
42 StartBlock=%u EndBlock=%u\n";
43 static char FileAttributes[] = "UpdCat Job=%s FileAttributes ";
44 static char Job_status[] = "3012 Job %s jobstatus %d\n";
47 /* Responses received from the Director */
48 static char OK_media[] = "1000 OK VolName=%127s VolJobs=%u VolFiles=%u\
49 VolBlocks=%u VolBytes=%" lld " VolMounts=%u VolErrors=%u VolWrites=%u\
50 MaxVolBytes=%" lld " VolCapacityBytes=%" lld " VolStatus=%20s\
51 Slot=%d MaxVolJobs=%u MaxVolFiles=%u\n";
52 static char OK_update[] = "1000 OK UpdateMedia\n";
54 /* Forward referenced functions */
55 static int wait_for_sysop(JCR *jcr, DEVICE *dev, int wait_sec);
58 * Send current JobStatus to Director
60 int dir_send_job_status(JCR *jcr)
62 return bnet_fsend(jcr->dir_bsock, Job_status, jcr->Job, jcr->JobStatus);
67 * dir_get_volume_info()
69 * dir_find_next_appendable_volume()
71 static int do_request_volume_info(JCR *jcr)
73 BSOCK *dir = jcr->dir_bsock;
76 jcr->VolumeName[0] = 0; /* No volume */
77 if (bnet_recv(dir) <= 0) {
78 Dmsg0(200, "getvolname error bnet_recv\n");
79 Mmsg(&jcr->errmsg, _("Network error on bnet_recv in req_vol_info.\n"));
82 if (sscanf(dir->msg, OK_media, vol.VolCatName,
83 &vol.VolCatJobs, &vol.VolCatFiles,
84 &vol.VolCatBlocks, &vol.VolCatBytes,
85 &vol.VolCatMounts, &vol.VolCatErrors,
86 &vol.VolCatWrites, &vol.VolCatMaxBytes,
87 &vol.VolCatCapacityBytes, vol.VolCatStatus,
88 &vol.Slot, &vol.VolCatMaxJobs, &vol.VolCatMaxFiles) != 14) {
90 Dmsg1(200, "Bad response from Dir: %s\n", dir->msg);
91 Mmsg(&jcr->errmsg, _("Error scanning Dir response: %s\n"), dir->msg);
94 unbash_spaces(vol.VolCatName);
95 pm_strcpy(&jcr->VolumeName, vol.VolCatName); /* set desired VolumeName */
96 memcpy(&jcr->VolCatInfo, &vol, sizeof(jcr->VolCatInfo));
98 Dmsg2(200, "do_reqest_vol_info got slot=%d Volume=%s\n",
99 vol.Slot, vol.VolCatName);
105 * Get Volume info for a specific volume from the Director's Database
107 * Returns: 1 on success (not Director guarantees that Pool and MediaType
108 * are correct and VolStatus==Append or
109 * VolStatus==Recycle)
112 * Volume information returned in jcr
114 int dir_get_volume_info(JCR *jcr, int writing)
116 BSOCK *dir = jcr->dir_bsock;
118 bstrncpy(jcr->VolCatInfo.VolCatName, jcr->VolumeName, sizeof(jcr->VolCatInfo.VolCatName));
119 Dmsg1(200, "dir_get_volume_info=%s\n", jcr->VolCatInfo.VolCatName);
120 bash_spaces(jcr->VolCatInfo.VolCatName);
121 bnet_fsend(dir, Get_Vol_Info, jcr->Job, jcr->VolCatInfo.VolCatName, writing);
122 return do_request_volume_info(jcr);
128 * Get info on the next appendable volume in the Director's database
129 * Returns: 1 on success
132 * Volume information returned in jcr
135 int dir_find_next_appendable_volume(JCR *jcr)
137 BSOCK *dir = jcr->dir_bsock;
139 Dmsg0(200, "dir_find_next_appendable_volume\n");
140 bnet_fsend(dir, Find_media, jcr->Job, 1);
141 return do_request_volume_info(jcr);
146 * After writing a Volume, send the updated statistics
147 * back to the director.
149 int dir_update_volume_info(JCR *jcr, VOLUME_CAT_INFO *vol, int relabel)
151 BSOCK *dir = jcr->dir_bsock;
152 time_t EndTime = time(NULL);
153 char ed1[50], ed2[50];
155 if (vol->VolCatName[0] == 0) {
156 Jmsg0(jcr, M_ERROR, 0, _("NULL Volume name. This shouldn't happen!!!\n"));
159 bash_spaces(vol->VolCatName);
160 bnet_fsend(dir, Update_media, jcr->Job,
161 vol->VolCatName, vol->VolCatJobs, vol->VolCatFiles,
162 vol->VolCatBlocks, edit_uint64(vol->VolCatBytes, ed1),
163 vol->VolCatMounts, vol->VolCatErrors,
164 vol->VolCatWrites, edit_uint64(vol->VolCatMaxBytes, ed2),
165 EndTime, vol->VolCatStatus, vol->Slot, relabel);
166 Dmsg1(120, "update_volume_data(): %s", dir->msg);
167 unbash_spaces(vol->VolCatName);
168 if (bnet_recv(dir) <= 0) {
169 Dmsg0(190, "updateVolCatInfo error bnet_recv\n");
170 Jmsg(jcr, M_ERROR, 0, _("Error updating Volume info Vol=\"%s\": ERR=%s\n"),
171 vol->VolCatName, bnet_strerror(dir));
174 Dmsg1(120, "Updatevol: %s", dir->msg);
175 if (strcmp(dir->msg, OK_update) != 0) {
176 Dmsg1(130, "Bad response from Dir: %s\n", dir->msg);
177 Jmsg(jcr, M_ERROR, 0, _("Error updating Volume info Vol=\"%s\": %s\n"),
178 vol->VolCatName, dir->msg);
185 * After writing a Volume, create the JobMedia record.
187 int dir_create_jobmedia_record(JCR *jcr)
189 BSOCK *dir = jcr->dir_bsock;
191 if (jcr->VolFirstIndex == 0) {
192 return 1; /* nothing written to tape */
195 bnet_fsend(dir, Create_job_media, jcr->Job,
196 jcr->VolFirstIndex, jcr->JobFiles,
197 jcr->StartFile, jcr->EndFile,
198 jcr->StartBlock, jcr->EndBlock);
199 Dmsg1(100, "create_jobmedia(): %s", dir->msg);
200 if (bnet_recv(dir) <= 0) {
201 Dmsg0(190, "create_jobmedia error bnet_recv\n");
202 Jmsg(jcr, M_ERROR, 0, _("Error creating JobMedia record: ERR=%s\n"),
206 Dmsg1(120, "Create_jobmedia: %s", dir->msg);
207 if (strcmp(dir->msg, OK_update) != 0) {
208 Dmsg1(130, "Bad response from Dir: %s\n", dir->msg);
209 Jmsg(jcr, M_ERROR, 0, _("Error creating JobMedia record: %s\n"), dir->msg);
217 * Update File Attribute data
219 int dir_update_file_attributes(JCR *jcr, DEV_RECORD *rec)
221 BSOCK *dir = jcr->dir_bsock;
224 dir->msglen = sprintf(dir->msg, FileAttributes, jcr->Job);
225 dir->msg = check_pool_memory_size(dir->msg, dir->msglen +
226 sizeof(DEV_RECORD) + rec->data_len);
227 ser_begin(dir->msg + dir->msglen, 0);
228 ser_uint32(rec->VolSessionId);
229 ser_uint32(rec->VolSessionTime);
230 ser_int32(rec->FileIndex);
231 ser_int32(rec->Stream);
232 ser_uint32(rec->data_len);
233 ser_bytes(rec->data, rec->data_len);
234 dir->msglen = ser_length(dir->msg);
235 return bnet_send(dir);
241 * Entered with device blocked.
242 * Leaves with device blocked.
244 * Returns: 1 on success (operator issues a mount command)
246 * Note, must create dev->errmsg on error return.
248 * On success, jcr->VolumeName and jcr->VolCatInfo contain
249 * information on suggested volume, but this may not be the
250 * same as what is actually mounted.
252 * When we return with success, the correct tape may or may not
253 * actually be mounted. The calling routine must read it and
256 int dir_ask_sysop_to_mount_next_volume(JCR *jcr, DEVICE *dev)
259 /* ******FIXME******* put these on config variable */
260 int min_wait = 60 * 60;
261 int max_wait = 24 * 60 * 60;
262 int max_num_wait = 9; /* 5 waits =~ 1 day, then 1 day at a time */
267 Dmsg0(130, "enter dir_ask_sysop_to_mount_next_volume\n");
268 ASSERT(dev->dev_blocked);
271 if (job_canceled(jcr)) {
272 Mmsg(&dev->errmsg, _("Job %s canceled while waiting for mount on Storage Device \"%s\".\n"),
273 jcr->Job, jcr->dev_name);
274 Jmsg(jcr, M_FATAL, 0, "%s", dev->errmsg);
277 if (dir_find_next_appendable_volume(jcr)) { /* get suggested volume */
278 jstat = JS_WaitMount;
280 * If we have a valid volume name and we are not
281 * removable media, return now, otherwise wait
282 * for the operator to mount the media.
284 if (jcr->VolumeName[0] && !dev_cap(dev, CAP_REM) && dev_cap(dev, CAP_LABEL)) {
285 Dmsg0(190, "Return 1 from mount without wait.\n");
288 Jmsg(jcr, M_MOUNT, 0, _(
289 "Please mount Volume \"%s\" on Storage Device \"%s\" for Job %s\n"
290 "Use \"mount\" command to release Job.\n"),
291 jcr->VolumeName, jcr->dev_name, jcr->Job);
292 Dmsg3(190, "Mount %s on %s for Job %s\n",
293 jcr->VolumeName, jcr->dev_name, jcr->Job);
295 jstat = JS_WaitMedia;
296 Jmsg(jcr, M_MOUNT, 0, _(
297 "Job %s waiting. Cannot find any appendable volumes.\n\
298 Please use the \"label\" command to create a new Volume for:\n\
308 jcr->JobStatus = jstat;
309 dir_send_job_status(jcr);
311 stat = wait_for_sysop(jcr, dev, wait_sec);
313 if (stat == ETIMEDOUT) {
314 wait_sec *= 2; /* double wait time */
315 if (wait_sec > max_wait) { /* but not longer than maxtime */
319 if (num_wait >= max_num_wait) {
320 Mmsg(&dev->errmsg, _("Gave up waiting to mount Storage Device \"%s\" for Job %s\n"),
321 jcr->dev_name, jcr->Job);
322 Jmsg(jcr, M_FATAL, 0, "%s", dev->errmsg);
323 Dmsg1(190, "Gave up waiting on device %s\n", dev->dev_name);
324 return 0; /* exceeded maximum waits */
328 if (stat == EINVAL) {
329 Mmsg2(&dev->errmsg, _("pthread error in mount_next_volume stat=%d ERR=%s\n"),
330 stat, strerror(stat));
331 Jmsg(jcr, M_FATAL, 0, "%s", dev->errmsg);
335 Jmsg(jcr, M_WARNING, 0, _("pthread error in mount_next_volume stat=%d ERR=%s\n"), stat,
338 Dmsg1(190, "Someone woke me for device %s\n", dev->dev_name);
340 /* Restart wait counters */
343 /* If no VolumeName, and cannot get one, try again */
344 if (jcr->VolumeName[0] == 0 &&
345 !dir_find_next_appendable_volume(jcr)) {
346 Jmsg(jcr, M_MOUNT, 0, _(
347 "Someone woke me up, but I cannot find any appendable\n\
348 volumes for Job=%s.\n"), jcr->Job);
353 set_jcr_job_status(jcr, JS_Running);
354 dir_send_job_status(jcr);
355 Dmsg0(130, "leave dir_ask_sysop_to_mount_next_volume\n");
361 * Entered with device blocked and jcr->VolumeName is desired
363 * Leaves with device blocked.
365 * Returns: 1 on success (operator issues a mount command)
367 * Note, must create dev->errmsg on error return.
370 int dir_ask_sysop_to_mount_volume(JCR *jcr, DEVICE *dev)
373 /* ******FIXME******* put these on config variable */
374 int min_wait = 60 * 60;
375 int max_wait = 24 * 60 * 60;
376 int max_num_wait = 9; /* 5 waits =~ 1 day, then 1 day at a time */
381 Dmsg0(130, "enter dir_ask_sysop_to_mount_next_volume\n");
382 if (!jcr->VolumeName[0]) {
383 Mmsg0(&dev->errmsg, _("Cannot request another volume: no volume name given.\n"));
386 ASSERT(dev->dev_blocked);
389 if (job_canceled(jcr)) {
390 Mmsg(&dev->errmsg, _("Job %s canceled while waiting for mount on Storage Device \"%s\".\n"),
391 jcr->Job, jcr->dev_name);
394 msg = _("Please mount");
395 Jmsg(jcr, M_MOUNT, 0, _("%s Volume \"%s\" on Storage Device \"%s\" for Job %s\n"),
396 msg, jcr->VolumeName, jcr->dev_name, jcr->Job);
397 Dmsg3(190, "Mount %s on %s for Job %s\n",
398 jcr->VolumeName, jcr->dev_name, jcr->Job);
400 jcr->JobStatus = JS_WaitMount;
401 dir_send_job_status(jcr);
403 stat = wait_for_sysop(jcr, dev, wait_sec); /* wait on device */
405 if (stat == ETIMEDOUT) {
406 wait_sec *= 2; /* double wait time */
407 if (wait_sec > max_wait) { /* but not longer than maxtime */
411 if (num_wait >= max_num_wait) {
412 Mmsg(&dev->errmsg, _("Gave up waiting to mount Storage Device \"%s\" for Job %s\n"),
413 jcr->dev_name, jcr->Job);
414 Jmsg(jcr, M_FATAL, 0, "%s", dev->errmsg);
415 Dmsg1(190, "Gave up waiting on device %s\n", dev->dev_name);
416 return 0; /* exceeded maximum waits */
420 if (stat == EINVAL) {
421 Mmsg2(&dev->errmsg, _("pthread error in mount_volume stat=%d ERR=%s\n"),
422 stat, strerror(stat));
423 Jmsg(jcr, M_FATAL, 0, "%s", dev->errmsg);
427 Jmsg(jcr, M_ERROR, 0, _("pthread error in mount_next_volume stat=%d ERR=%s\n"), stat,
430 Dmsg1(190, "Someone woke me for device %s\n", dev->dev_name);
432 /* Restart wait counters */
437 set_jcr_job_status(jcr, JS_Running);
438 dir_send_job_status(jcr);
439 Dmsg0(130, "leave dir_ask_sysop_to_mount_next_volume\n");
444 * Wait for SysOp to mount a tape
446 static int wait_for_sysop(JCR *jcr, DEVICE *dev, int wait_sec)
450 struct timespec timeout;
452 time_t start = time(NULL);
453 time_t last_heartbeat = 0;
458 * Wait requested time (wait_sec). However, we also wake up every
459 * HB_TIME seconds and send a heartbeat to the FD and the Director
460 * to keep stateful firewalls from closing them down while waiting
464 if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
465 add_wait = me->heartbeat_interval;
467 gettimeofday(&tv, &tz);
468 timeout.tv_nsec = tv.tv_usec * 1000;
469 timeout.tv_sec = tv.tv_sec + add_wait;
472 dev_blocked = dev->dev_blocked;
473 dev->dev_blocked = BST_WAITING_FOR_SYSOP; /* indicate waiting for mount */
475 for ( ; !job_canceled(jcr); ) {
478 Dmsg3(100, "I'm going to sleep on device %s. HB=%d wait=%d\n", dev->dev_name,
479 (int)me->heartbeat_interval, wait_sec);
480 stat = pthread_cond_timedwait(&dev->wait_next_vol, &dev->mutex, &timeout);
481 Dmsg1(100, "Wokeup from sleep on device stat=%d\n", stat);
485 /* Note, this always triggers the first time. We want that. */
486 if (me->heartbeat_interval) {
487 if (now - last_heartbeat >= me->heartbeat_interval) {
488 /* send heartbeats */
489 if (jcr->file_bsock) {
490 bnet_sig(jcr->file_bsock, BNET_HEARTBEAT);
491 Dmsg0(100, "Send heartbeat to FD.\n");
493 if (jcr->dir_bsock) {
494 bnet_sig(jcr->dir_bsock, BNET_HEARTBEAT);
496 last_heartbeat = now;
500 if (dev->dev_blocked == BST_MOUNT) { /* mount request ? */
505 if (stat != ETIMEDOUT) { /* we blocked the device */
506 break; /* on error return */
508 if (now - start >= wait_sec) { /* on exceeding wait time return */
509 Dmsg0(100, "Exceed wait time.\n");
512 add_wait = wait_sec - (now - start);
513 if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
514 add_wait = me->heartbeat_interval;
516 gettimeofday(&tv, &tz);
517 timeout.tv_nsec = tv.tv_usec * 1000;
518 timeout.tv_sec = tv.tv_sec + add_wait; /* additional wait */
519 Dmsg1(100, "Additional wait %d sec.\n", add_wait);
522 dev->dev_blocked = dev_blocked;