2 * Subroutines to handle Catalog reqests sent to the Director
3 * Reqests/commands from the Director are handled in dircmd.c
5 * Kern Sibbald, December 2000
10 Copyright (C) 2000-2005 Kern Sibbald
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of
15 the License, or (at your option) any later version.
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public
23 License along with this program; if not, write to the Free
24 Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
29 #include "bacula.h" /* pull in global headers */
30 #include "stored.h" /* pull in Storage Deamon headers */
32 /* Requests sent to the Director */
33 static char Find_media[] = "CatReq Job=%s FindMedia=%d\n";
34 static char Get_Vol_Info[] = "CatReq Job=%s GetVolInfo VolName=%s write=%d\n";
35 static char Update_media[] = "CatReq Job=%s UpdateMedia VolName=%s"
36 " VolJobs=%u VolFiles=%u VolBlocks=%u VolBytes=%s VolMounts=%u"
37 " VolErrors=%u VolWrites=%u MaxVolBytes=%s EndTime=%d VolStatus=%s"
38 " Slot=%d relabel=%d InChanger=%d VolReadTime=%s VolWriteTime=%s"
40 static char Create_job_media[] = "CatReq Job=%s CreateJobMedia"
41 " FirstIndex=%u LastIndex=%u StartFile=%u EndFile=%u"
42 " StartBlock=%u EndBlock=%u\n";
43 static char FileAttributes[] = "UpdCat Job=%s FileAttributes ";
44 static char Job_status[] = "Status Job=%s JobStatus=%d\n";
47 /* Responses received from the Director */
48 static char OK_media[] = "1000 OK VolName=%127s VolJobs=%u VolFiles=%u"
49 " VolBlocks=%u VolBytes=%" lld " VolMounts=%u VolErrors=%u VolWrites=%u"
50 " MaxVolBytes=%" lld " VolCapacityBytes=%" lld " VolStatus=%20s"
51 " Slot=%d MaxVolJobs=%u MaxVolFiles=%u InChanger=%d"
52 " VolReadTime=%" lld " VolWriteTime=%" lld " EndFile=%u EndBlock=%u"
53 " VolParts=%u LabelType=%d";
56 static char OK_create[] = "1000 OK CreateJobMedia\n";
58 /* Forward referenced functions */
59 static int wait_for_sysop(DCR *dcr);
62 * Send current JobStatus to Director
64 bool dir_send_job_status(JCR *jcr)
66 return bnet_fsend(jcr->dir_bsock, Job_status, jcr->Job, jcr->JobStatus);
71 * dir_get_volume_info()
73 * dir_find_next_appendable_volume()
75 * Returns: true on success and vol info in dcr->VolCatInfo
78 static bool do_get_volume_info(DCR *dcr)
81 BSOCK *dir = jcr->dir_bsock;
86 dcr->VolumeName[0] = 0; /* No volume */
87 if (bnet_recv(dir) <= 0) {
88 Dmsg0(200, "getvolname error bnet_recv\n");
89 Mmsg(jcr->errmsg, _("Network error on bnet_recv in req_vol_info.\n"));
92 memset(&vol, 0, sizeof(vol));
93 Dmsg1(300, "Get vol info=%s", dir->msg);
94 n = sscanf(dir->msg, OK_media, vol.VolCatName,
95 &vol.VolCatJobs, &vol.VolCatFiles,
96 &vol.VolCatBlocks, &vol.VolCatBytes,
97 &vol.VolCatMounts, &vol.VolCatErrors,
98 &vol.VolCatWrites, &vol.VolCatMaxBytes,
99 &vol.VolCatCapacityBytes, vol.VolCatStatus,
100 &vol.Slot, &vol.VolCatMaxJobs, &vol.VolCatMaxFiles,
101 &InChanger, &vol.VolReadTime, &vol.VolWriteTime,
102 &vol.EndFile, &vol.EndBlock, &vol.VolCatParts,
105 Dmsg2(100, "Bad response from Dir fields=%d: %s\n", n, dir->msg);
106 Mmsg(jcr->errmsg, _("Error getting Volume info: %s\n"), dir->msg);
109 vol.InChanger = InChanger; /* bool in structure */
110 unbash_spaces(vol.VolCatName);
111 bstrncpy(dcr->VolumeName, vol.VolCatName, sizeof(dcr->VolumeName));
112 memcpy(&dcr->VolCatInfo, &vol, sizeof(dcr->VolCatInfo));
114 Dmsg2(300, "do_reqest_vol_info got slot=%d Volume=%s\n",
115 vol.Slot, vol.VolCatName);
121 * Get Volume info for a specific volume from the Director's Database
123 * Returns: true on success (not Director guarantees that Pool and MediaType
124 * are correct and VolStatus==Append or
125 * VolStatus==Recycle)
128 * Volume information returned in jcr
130 bool dir_get_volume_info(DCR *dcr, enum get_vol_info_rw writing)
133 BSOCK *dir = jcr->dir_bsock;
135 bstrncpy(dcr->VolCatInfo.VolCatName, dcr->VolumeName, sizeof(dcr->VolCatInfo.VolCatName));
136 Dmsg1(300, "dir_get_volume_info=%s\n", dcr->VolCatInfo.VolCatName);
137 bash_spaces(dcr->VolCatInfo.VolCatName);
138 bnet_fsend(dir, Get_Vol_Info, jcr->Job, dcr->VolCatInfo.VolCatName,
139 writing==GET_VOL_INFO_FOR_WRITE?1:0);
140 return do_get_volume_info(dcr);
146 * Get info on the next appendable volume in the Director's database
147 * Returns: true on success
150 * Volume information returned in dcr
153 bool dir_find_next_appendable_volume(DCR *dcr)
156 BSOCK *dir = jcr->dir_bsock;
159 Dmsg0(200, "dir_find_next_appendable_volume\n");
161 * Try the three oldest or most available volumes. Note,
162 * the most available could already be mounted on another
163 * drive, so we continue looking for a not in use Volume.
165 for (int vol_index=1; vol_index < 3; vol_index++) {
166 bnet_fsend(dir, Find_media, jcr->Job, vol_index);
167 if (do_get_volume_info(dcr)) {
168 Dmsg2(300, "JobId=%d got possible Vol=%s\n", jcr->JobId, dcr->VolumeName);
171 * Walk through all jobs and see if the volume is
172 * already mounted. If so, try a different one.
173 * This would be better done by walking through
179 free_locked_jcr(njcr);
182 Dmsg2(300, "Compare to JobId=%d using Vol=%s\n", njcr->JobId, njcr->dcr->VolumeName);
183 if (njcr->dcr && strcmp(dcr->VolumeName, njcr->dcr->VolumeName) == 0) {
185 Dmsg1(400, "Vol in use by JobId=%u\n", njcr->JobId);
186 free_locked_jcr(njcr);
189 free_locked_jcr(njcr);
193 Dmsg0(400, "dir_find_next_appendable_volume return true\n");
194 return true; /* Got good Volume */
197 Dmsg0(200, "No volume info, return false\n");
201 Dmsg0(400, "dir_find_next_appendable_volume return true\n");
207 * After writing a Volume, send the updated statistics
208 * back to the director. The information comes from the
211 bool dir_update_volume_info(DCR *dcr, bool label)
214 BSOCK *dir = jcr->dir_bsock;
215 DEVICE *dev = dcr->dev;
216 time_t LastWritten = time(NULL);
217 char ed1[50], ed2[50], ed3[50], ed4[50];
218 VOLUME_CAT_INFO *vol = &dev->VolCatInfo;
222 if (vol->VolCatName[0] == 0) {
223 Jmsg0(jcr, M_FATAL, 0, _("NULL Volume name. This shouldn't happen!!!\n"));
224 Dmsg0(000, "NULL Volume name. This shouldn't happen!!!\n");
227 if (dev->can_read()) {
228 Jmsg0(jcr, M_FATAL, 0, _("Attempt to update_volume_info in read mode!!!\n"));
229 Dmsg0(000, "Attempt to update_volume_info in read mode!!!\n");
233 Dmsg1(300, "Update cat VolFiles=%d\n", dev->file);
234 /* Just labeled or relabeled the tape */
236 bstrncpy(vol->VolCatStatus, "Append", sizeof(vol->VolCatStatus));
237 vol->VolCatBytes = 1; /* indicates tape labeled */
239 pm_strcpy(VolumeName, vol->VolCatName);
240 bash_spaces(VolumeName);
241 InChanger = vol->InChanger;
242 bnet_fsend(dir, Update_media, jcr->Job,
243 VolumeName.c_str(), vol->VolCatJobs, vol->VolCatFiles,
244 vol->VolCatBlocks, edit_uint64(vol->VolCatBytes, ed1),
245 vol->VolCatMounts, vol->VolCatErrors,
246 vol->VolCatWrites, edit_uint64(vol->VolCatMaxBytes, ed2),
247 LastWritten, vol->VolCatStatus, vol->Slot, label,
248 InChanger, /* bool in structure */
249 edit_uint64(vol->VolReadTime, ed3),
250 edit_uint64(vol->VolWriteTime, ed4),
253 Dmsg1(300, "update_volume_info(): %s", dir->msg);
255 if (!do_get_volume_info(dcr)) {
256 Jmsg(jcr, M_FATAL, 0, "%s", jcr->errmsg);
257 Dmsg2(000, "Didn't get vol info vol=%s: ERR=%s",
258 vol->VolCatName, jcr->errmsg);
261 Dmsg1(420, "get_volume_info(): %s", dir->msg);
262 /* Update dev Volume info in case something changed (e.g. expired) */
263 memcpy(&dev->VolCatInfo, &dcr->VolCatInfo, sizeof(dev->VolCatInfo));
268 * After writing a Volume, create the JobMedia record.
270 bool dir_create_jobmedia_record(DCR *dcr)
273 BSOCK *dir = jcr->dir_bsock;
275 if (!dcr->WroteVol) {
276 return true; /* nothing written to tape */
279 dcr->WroteVol = false;
280 bnet_fsend(dir, Create_job_media, jcr->Job,
281 dcr->VolFirstIndex, dcr->VolLastIndex,
282 dcr->StartFile, dcr->EndFile,
283 dcr->StartBlock, dcr->EndBlock);
284 Dmsg1(400, "create_jobmedia(): %s", dir->msg);
285 if (bnet_recv(dir) <= 0) {
286 Dmsg0(190, "create_jobmedia error bnet_recv\n");
287 Jmsg(jcr, M_FATAL, 0, _("Error creating JobMedia record: ERR=%s\n"),
291 Dmsg1(400, "Create_jobmedia: %s", dir->msg);
292 if (strcmp(dir->msg, OK_create) != 0) {
293 Dmsg1(130, "Bad response from Dir: %s\n", dir->msg);
294 Jmsg(jcr, M_FATAL, 0, _("Error creating JobMedia record: %s\n"), dir->msg);
302 * Update File Attribute data
304 bool dir_update_file_attributes(DCR *dcr, DEV_RECORD *rec)
307 BSOCK *dir = jcr->dir_bsock;
310 #ifdef NO_ATTRIBUTES_TEST
314 dir->msglen = sprintf(dir->msg, FileAttributes, jcr->Job);
315 dir->msg = check_pool_memory_size(dir->msg, dir->msglen +
316 sizeof(DEV_RECORD) + rec->data_len);
317 ser_begin(dir->msg + dir->msglen, 0);
318 ser_uint32(rec->VolSessionId);
319 ser_uint32(rec->VolSessionTime);
320 ser_int32(rec->FileIndex);
321 ser_int32(rec->Stream);
322 ser_uint32(rec->data_len);
323 ser_bytes(rec->data, rec->data_len);
324 dir->msglen = ser_length(dir->msg);
325 return bnet_send(dir);
330 * Request the sysop to create an appendable volume
332 * Entered with device blocked.
333 * Leaves with device blocked.
335 * Returns: true on success (operator issues a mount command)
337 * Note, must create dev->errmsg on error return.
339 * On success, dcr->VolumeName and dcr->VolCatInfo contain
340 * information on suggested volume, but this may not be the
341 * same as what is actually mounted.
343 * When we return with success, the correct tape may or may not
344 * actually be mounted. The calling routine must read it and
347 bool dir_ask_sysop_to_create_appendable_volume(DCR *dcr)
352 DEVICE *dev = dcr->dev;
355 Dmsg0(400, "enter dir_ask_sysop_to_create_appendable_volume\n");
356 ASSERT(dev->dev_blocked);
358 if (job_canceled(jcr)) {
360 _("Job %s canceled while waiting for mount on Storage Device \"%s\".\n"),
361 jcr->Job, dcr->dev_name);
362 Jmsg(jcr, M_INFO, 0, "%s", dev->errmsg);
365 /* First pass, we *know* there are no appendable volumes, so no need to call */
366 if (!first && dir_find_next_appendable_volume(dcr)) { /* get suggested volume */
367 unmounted = (dev->dev_blocked == BST_UNMOUNTED) ||
368 (dev->dev_blocked == BST_UNMOUNTED_WAITING_FOR_SYSOP);
370 * If we have a valid volume name and we are not
371 * removable media, return now, or if we have a
372 * Slot for an autochanger, otherwise wait
373 * for the operator to mount the media.
375 if (!unmounted && ((dcr->VolumeName[0] && !dev_cap(dev, CAP_REM) &&
376 dev_cap(dev, CAP_LABEL)) ||
377 (dcr->VolumeName[0] && dcr->VolCatInfo.Slot))) {
378 Dmsg0(400, "Return 1 from mount without wait.\n");
381 jstat = JS_WaitMount;
383 Jmsg(jcr, M_MOUNT, 0, _(
384 "Please mount Volume \"%s\" on Storage Device \"%s\" for Job %s\n"
385 "Use \"mount\" command to release Job.\n"),
386 dcr->VolumeName, dcr->dev_name, jcr->Job);
387 Dmsg3(400, "Mount %s on %s for Job %s\n",
388 dcr->VolumeName, dcr->dev_name, jcr->Job);
391 jstat = JS_WaitMedia;
393 Jmsg(jcr, M_MOUNT, 0, _(
394 "Job %s waiting. Cannot find any appendable volumes.\n"
395 "Please use the \"label\" command to create a new Volume for:\n"
407 jcr->JobStatus = jstat;
408 dir_send_job_status(jcr);
410 stat = wait_for_sysop(dcr);
412 Dmsg1(400, "Poll timeout in create append vol on device %s\n", dev_name(dev));
416 if (stat == ETIMEDOUT) {
417 if (!double_dev_wait_time(dev)) {
418 Mmsg(dev->errmsg, _("Max time exceeded waiting to mount Storage Device \"%s\" for Job %s\n"),
419 dev_name(dev), jcr->Job);
420 Jmsg(jcr, M_FATAL, 0, "%s", dev->errmsg);
421 Dmsg1(400, "Gave up waiting on device %s\n", dev_name(dev));
422 return false; /* exceeded maximum waits */
426 if (stat == EINVAL) {
428 Mmsg2(dev->errmsg, _("pthread error in mount_next_volume stat=%d ERR=%s\n"),
429 stat, be.strerror(stat));
430 Jmsg(jcr, M_FATAL, 0, "%s", dev->errmsg);
435 Jmsg(jcr, M_WARNING, 0, _("pthread error in mount_next_volume stat=%d ERR=%s\n"), stat,
438 Dmsg1(400, "Someone woke me for device %s\n", dev_name(dev));
440 /* If no VolumeName, and cannot get one, try again */
441 if (dcr->VolumeName[0] == 0 && !job_canceled(jcr) &&
442 !dir_find_next_appendable_volume(dcr)) {
443 Jmsg(jcr, M_MOUNT, 0, _(
444 "Someone woke me up, but I cannot find any appendable\n"
445 "volumes for Job=%s.\n"), jcr->Job);
446 /* Restart wait counters after user interaction */
447 init_dev_wait_timers(dev);
450 unmounted = (dev->dev_blocked == BST_UNMOUNTED) ||
451 (dev->dev_blocked == BST_UNMOUNTED_WAITING_FOR_SYSOP);
453 continue; /* continue to wait */
457 * Device mounted, we have a volume, break and return
461 set_jcr_job_status(jcr, JS_Running);
462 dir_send_job_status(jcr);
463 Dmsg0(400, "leave dir_ask_sysop_to_mount_create_appendable_volume\n");
468 * Request to mount specific Volume
470 * Entered with device blocked and dcr->VolumeName is desired
472 * Leaves with device blocked.
474 * Returns: true on success (operator issues a mount command)
476 * Note, must create dev->errmsg on error return.
479 bool dir_ask_sysop_to_mount_volume(DCR *dcr)
483 DEVICE *dev = dcr->dev;
486 Dmsg0(400, "enter dir_ask_sysop_to_mount_volume\n");
487 if (!dcr->VolumeName[0]) {
488 Mmsg0(dev->errmsg, _("Cannot request another volume: no volume name given.\n"));
491 ASSERT(dev->dev_blocked);
493 if (job_canceled(jcr)) {
494 Mmsg(dev->errmsg, _("Job %s canceled while waiting for mount on Storage Device \"%s\".\n"),
495 jcr->Job, dcr->dev_name);
500 msg = _("Please mount");
501 Jmsg(jcr, M_MOUNT, 0, _("%s Volume \"%s\" on Storage Device \"%s\" for Job %s\n"),
502 msg, dcr->VolumeName, dcr->dev_name, jcr->Job);
503 Dmsg3(400, "Mount \"%s\" on device \"%s\" for Job %s\n",
504 dcr->VolumeName, dcr->dev_name, jcr->Job);
507 jcr->JobStatus = JS_WaitMount;
508 dir_send_job_status(jcr);
510 stat = wait_for_sysop(dcr); ; /* wait on device */
512 Dmsg1(400, "Poll timeout in mount vol on device %s\n", dev_name(dev));
513 Dmsg1(400, "Blocked=%s\n", edit_blocked_reason(dev));
517 if (stat == ETIMEDOUT) {
518 if (!double_dev_wait_time(dev)) {
519 Mmsg(dev->errmsg, _("Max time exceeded waiting to mount Storage Device \"%s\" for Job %s\n"),
520 dev_name(dev), jcr->Job);
521 Jmsg(jcr, M_FATAL, 0, "%s", dev->errmsg);
522 Dmsg1(400, "Gave up waiting on device %s\n", dev_name(dev));
523 return false; /* exceeded maximum waits */
527 if (stat == EINVAL) {
529 Mmsg2(dev->errmsg, _("pthread error in mount_volume stat=%d ERR=%s\n"),
530 stat, be.strerror(stat));
531 Jmsg(jcr, M_FATAL, 0, "%s", dev->errmsg);
536 Jmsg(jcr, M_FATAL, 0, _("pthread error in mount_next_volume stat=%d ERR=%s\n"), stat,
539 Dmsg1(400, "Someone woke me for device %s\n", dev_name(dev));
542 set_jcr_job_status(jcr, JS_Running);
543 dir_send_job_status(jcr);
544 Dmsg0(400, "leave dir_ask_sysop_to_mount_volume\n");
549 * Wait for SysOp to mount a tape
551 static int wait_for_sysop(DCR *dcr)
555 struct timespec timeout;
556 time_t last_heartbeat = 0;
557 time_t first_start = time(NULL);
561 DEVICE *dev = dcr->dev;
565 unmounted = (dev->dev_blocked == BST_UNMOUNTED) ||
566 (dev->dev_blocked == BST_UNMOUNTED_WAITING_FOR_SYSOP);
570 * Wait requested time (dev->rem_wait_sec). However, we also wake up every
571 * HB_TIME seconds and send a heartbeat to the FD and the Director
572 * to keep stateful firewalls from closing them down while waiting
575 add_wait = dev->rem_wait_sec;
576 if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
577 add_wait = me->heartbeat_interval;
579 /* If the user did not unmount the tape and we are polling, ensure
580 * that we poll at the correct interval.
582 if (!unmounted && dev->vol_poll_interval && add_wait > dev->vol_poll_interval) {
583 add_wait = dev->vol_poll_interval;
585 gettimeofday(&tv, &tz);
586 timeout.tv_nsec = tv.tv_usec * 1000;
587 timeout.tv_sec = tv.tv_sec + add_wait;
590 dev->dev_prev_blocked = dev->dev_blocked;
591 dev->dev_blocked = BST_WAITING_FOR_SYSOP; /* indicate waiting for mount */
594 for ( ; !job_canceled(jcr); ) {
597 Dmsg3(400, "I'm going to sleep on device %s. HB=%d wait=%d\n", dev_name(dev),
598 (int)me->heartbeat_interval, dev->wait_sec);
600 /* Wait required time */
601 stat = pthread_cond_timedwait(&dev->wait_next_vol, &dev->mutex, &timeout);
602 Dmsg1(400, "Wokeup from sleep on device stat=%d\n", stat);
605 dev->rem_wait_sec -= (now - start);
607 /* Note, this always triggers the first time. We want that. */
608 if (me->heartbeat_interval) {
609 if (now - last_heartbeat >= me->heartbeat_interval) {
610 /* send heartbeats */
611 if (jcr->file_bsock) {
612 bnet_sig(jcr->file_bsock, BNET_HEARTBEAT);
613 Dmsg0(400, "Send heartbeat to FD.\n");
615 if (jcr->dir_bsock) {
616 bnet_sig(jcr->dir_bsock, BNET_HEARTBEAT);
618 last_heartbeat = now;
623 * Check if user unmounted the device while we were waiting
625 unmounted = (dev->dev_blocked == BST_UNMOUNTED) ||
626 (dev->dev_blocked == BST_UNMOUNTED_WAITING_FOR_SYSOP);
628 if (stat != ETIMEDOUT) { /* we blocked the device */
629 break; /* on error return */
631 if (dev->rem_wait_sec <= 0) { /* on exceeding wait time return */
632 Dmsg0(400, "Exceed wait time.\n");
636 if (!unmounted && dev->vol_poll_interval &&
637 (now - first_start >= dev->vol_poll_interval)) {
638 Dmsg1(400, "In wait blocked=%s\n", edit_blocked_reason(dev));
639 dev->poll = true; /* returning a poll event */
643 * Check if user mounted the device while we were waiting
645 if (dev->dev_blocked == BST_MOUNT) { /* mount request ? */
650 add_wait = dev->wait_sec - (now - start);
654 if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
655 add_wait = me->heartbeat_interval;
657 gettimeofday(&tv, &tz);
658 timeout.tv_nsec = tv.tv_usec * 1000;
659 timeout.tv_sec = tv.tv_sec + add_wait; /* additional wait */
660 Dmsg1(400, "Additional wait %d sec.\n", add_wait);
664 dev->dev_blocked = dev->dev_prev_blocked; /* restore entry state */