2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2007 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of John Walker.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Drive reservation functions for Storage Daemon
33 * Split from job.c and acquire.c June 2005
42 #define jid() ((int)get_jobid_from_tid())
44 const int dbglvl = 50;
46 static dlist *vol_list = NULL;
47 static brwlock_t reservation_lock;
48 static brwlock_t vol_list_lock;
50 /* Forward referenced functions */
51 static int can_reserve_drive(DCR *dcr, RCTX &rctx);
52 static int reserve_device(RCTX &rctx);
53 static bool reserve_device_for_read(DCR *dcr);
54 static bool reserve_device_for_append(DCR *dcr, RCTX &rctx);
55 static bool use_storage_cmd(JCR *jcr);
56 static void queue_reserve_message(JCR *jcr);
58 /* Requests from the Director daemon */
59 static char use_storage[] = "use storage=%127s media_type=%127s "
60 "pool_name=%127s pool_type=%127s append=%d copy=%d stripe=%d\n";
61 static char use_device[] = "use device=%127s\n";
63 /* Responses sent to Director daemon */
64 static char OK_device[] = "3000 OK use device device=%s\n";
65 static char NO_device[] = "3924 Device \"%s\" not in SD Device resources.\n";
66 static char BAD_use[] = "3913 Bad use command: %s\n";
68 bool use_cmd(JCR *jcr)
71 * Get the device, media, and pool information
73 if (!use_storage_cmd(jcr)) {
74 set_jcr_job_status(jcr, JS_ErrorTerminated);
75 memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
81 static int my_compare(void *item1, void *item2)
83 return strcmp(((VOLRES *)item1)->vol_name, ((VOLRES *)item2)->vol_name);
87 void init_reservations_lock()
90 if ((errstat=rwl_init(&reservation_lock)) != 0) {
92 Emsg1(M_ABORT, 0, _("Unable to initialize reservation lock. ERR=%s\n"),
93 be.bstrerror(errstat));
96 if ((errstat=rwl_init(&vol_list_lock)) != 0) {
98 Emsg1(M_ABORT, 0, _("Unable to initialize volume list lock. ERR=%s\n"),
99 be.bstrerror(errstat));
103 void term_reservations_lock()
105 rwl_destroy(&reservation_lock);
106 rwl_destroy(&vol_list_lock);
109 int reservations_lock_count = 0;
111 /* This applies to a drive and to Volumes */
112 void _lock_reservations()
115 reservations_lock_count++;
116 if ((errstat=rwl_writelock(&reservation_lock)) != 0) {
118 Emsg2(M_ABORT, 0, "rwl_writelock failure. stat=%d: ERR=%s\n",
119 errstat, be.bstrerror(errstat));
123 void _unlock_reservations()
126 reservations_lock_count--;
127 if ((errstat=rwl_writeunlock(&reservation_lock)) != 0) {
129 Emsg2(M_ABORT, 0, "rwl_writeunlock failure. stat=%d: ERR=%s\n",
130 errstat, be.bstrerror(errstat));
134 int vol_list_lock_count = 0;
139 vol_list_lock_count++;
140 if ((errstat=rwl_writelock(&vol_list_lock)) != 0) {
142 Emsg2(M_ABORT, 0, "rwl_writelock failure. stat=%d: ERR=%s\n",
143 errstat, be.bstrerror(errstat));
147 void _unlock_volumes()
150 vol_list_lock_count--;
151 if ((errstat=rwl_writeunlock(&vol_list_lock)) != 0) {
153 Emsg2(M_ABORT, 0, "rwl_writeunlock failure. stat=%d: ERR=%s\n",
154 errstat, be.bstrerror(errstat));
160 * List Volumes -- this should be moved to status.c
167 static void debug_list_volumes(const char *imsg)
170 POOL_MEM msg(PM_MESSAGE);
173 foreach_dlist(vol, vol_list) {
175 Mmsg(msg, "List from %s: %s at %p on device %s\n", imsg,
176 vol->vol_name, vol->vol_name, vol->dev->print_name());
178 Mmsg(msg, "List from %s: %s at %p no dev\n", imsg, vol->vol_name, vol->vol_name);
180 Dmsg2(dbglvl, "jid=%u %s", jid(), msg.c_str());
185 foreach_dlist(vol, vol_list) {
186 if (vol->dev == dev) {
187 Dmsg0(000, "Two Volumes on same device.\n");
194 // Dmsg2(dbglvl, "List from %s: %d volumes\n", imsg, count);
200 * List Volumes -- this should be moved to status.c
202 void list_volumes(void sendit(const char *msg, int len, void *sarg), void *arg)
205 POOL_MEM msg(PM_MESSAGE);
209 foreach_dlist(vol, vol_list) {
211 len = Mmsg(msg, "%s on device %s\n", vol->vol_name, vol->dev->print_name());
212 sendit(msg.c_str(), len, arg);
214 len = Mmsg(msg, "%s no dev\n", vol->vol_name);
215 sendit(msg.c_str(), len, arg);
222 * Create a Volume item to put in the Volume list
223 * Ensure that the device points to it.
225 static VOLRES *new_vol_item(DCR *dcr, const char *VolumeName)
228 vol = (VOLRES *)malloc(sizeof(VOLRES));
229 memset(vol, 0, sizeof(VOLRES));
230 vol->vol_name = bstrdup(VolumeName);
232 Dmsg4(dbglvl, "jid=%u new Vol=%s at %p dev=%s\n", (int)dcr->jcr->JobId,
233 VolumeName, vol->vol_name, vol->dev->print_name());
237 static void free_vol_item(VOLRES *vol)
241 vol->dev->vol = NULL;
248 * Put a new Volume entry in the Volume list. This
249 * effectively reserves the volume so that it will
250 * not be mounted again.
252 * If the device has any current volume associated with it,
253 * and it is a different Volume, and the device is not busy,
254 * we release the old Volume item and insert the new one.
256 * It is assumed that the device is free and locked so that
257 * we can change the device structure.
259 * Some details of the Volume list handling:
261 * 1. The Volume list entry must be attached to the drive (rather than
262 * attached to a job as it currently is. I.e. the drive that "owns"
263 * the volume (reserved, in use, mounted)
264 * must point to the volume (still to be maintained in a list).
266 * 2. The Volume is entered in the list when a drive is reserved.
268 * 3. When a drive is in use, the device code must appropriately update the
269 * volume name as it changes (currently the list is static -- an entry is
270 * removed when the Volume is no longer reserved, in use or mounted).
271 * The new code must keep the same list entry as long as the drive
272 * has any volume associated with it but the volume name in the list
273 * must be updated when the drive has a different volume mounted.
275 * 4. A job that has reserved a volume, can un-reserve the volume, and if the
276 * volume is not mounted, and not reserved, and not in use, it will be
277 * removed from the list.
279 * 5. If a job wants to reserve a drive with a different Volume from the one on
280 * the drive, it can re-use the drive for the new Volume.
282 * 6. If a job wants a Volume that is in a different drive, it can either use the
283 * other drive or take the volume, only if the other drive is not in use or
286 * One nice aspect of this is that the reserve use count and the writer use count
287 * already exist and are correctly programmed and will need no changes -- use
288 * counts are always very tricky.
290 * The old code had a concept of "reserving" a Volume, but it needs to be changed
291 * to reserving and using a drive. A volume is must be attached to (owned by) a
292 * drive and can move from drive to drive or be unused given certain specific
293 * conditions of the drive. The key is that the drive must "own" the Volume.
294 * The old code has the job (dcr) owning the volume (more or less). The job is
295 * to change the insertion and removal of the volumes from the list to be based
296 * on the drive rather than the job.
298 * Return: VOLRES entry on success
299 * NULL volume busy on another drive
301 VOLRES *reserve_volume(DCR *dcr, const char *VolumeName)
304 DEVICE *dev = dcr->dev;
308 Dmsg2(dbglvl, "jid=%u reserve_volume %s\n", jid(), VolumeName);
310 * We lock the reservations system here to ensure
311 * when adding a new volume that no newly scheduled
312 * job can reserve it.
315 debug_list_volumes("begin reserve_volume");
317 * First, remove any old volume attached to this device as it
323 * Make sure we don't remove the current volume we are inserting
324 * because it was probably inserted by another job.
326 if (strcmp(vol->vol_name, VolumeName) == 0) {
327 goto get_out; /* Volume already on this device */
329 Dmsg3(dbglvl, "jid=%u reserve_vol free vol=%s at %p\n",
330 (int)dcr->jcr->JobId, vol->vol_name, vol->vol_name);
331 debug_list_volumes("reserve_vol free");
332 vol_list->remove(vol);
337 /* Create a new Volume entry */
338 nvol = new_vol_item(dcr, VolumeName);
341 * Now try to insert the new Volume
343 vol = (VOLRES *)vol_list->binary_insert(nvol, my_compare);
345 Dmsg3(dbglvl, "jid=%u Found vol=%s dev-same=%d\n", jid(), vol->vol_name, dev==vol->dev);
347 * At this point, a Volume with this name already is in the list,
348 * so we simply release our new Volume entry. Note, this should
349 * only happen if we are moving the volume from one drive to another.
351 Dmsg3(dbglvl, "jid=%u reserve_vol free-tmp vol=%s at %p\n",
352 (int)dcr->jcr->JobId, vol->vol_name, vol->vol_name);
354 * Clear dev pointer so that free_vol_item() doesn't
355 * take away our volume.
357 nvol->dev = NULL; /* don't zap dev entry */
360 /* Check if we are trying to use the Volume on a different drive */
361 if (dev != vol->dev) {
362 /* Caller wants to switch Volume to another device */
363 if (!vol->dev->is_busy()) {
364 /* OK to move it -- I'm not sure this will work */
365 Dmsg4(dbglvl, "==== jid=%u Swap vol=%s from dev=%s to %s\n", jid(), VolumeName,
366 vol->dev->print_name(), dev->print_name());
367 vol->dev->vol = NULL; /* take vol from old drive */
368 vol->dev->VolHdr.VolumeName[0] = 0;
369 vol->dev = dev; /* point vol at new drive */
370 dev->vol = vol; /* point dev at vol */
371 dev->VolHdr.VolumeName[0] = 0;
373 Dmsg4(dbglvl, "jid=%u Volume busy could not swap vol=%s from dev=%s to %s\n",
374 jid(), VolumeName, vol->dev->print_name(), dev->print_name());
375 vol = NULL; /* device busy */
382 debug_list_volumes("end new volume");
388 * Search for a Volume name in the Volume list.
390 * Returns: VOLRES entry on success
391 * NULL if the Volume is not in the list
393 VOLRES *find_volume(DCR *dcr)
396 /* Do not lock reservations here */
398 vol.vol_name = bstrdup(dcr->VolumeName);
399 fvol = (VOLRES *)vol_list->binary_search(&vol, my_compare);
401 Dmsg3(dbglvl, "jid=%u find_vol=%s found=%d\n", jid(), dcr->VolumeName, fvol!=NULL);
402 debug_list_volumes("find_volume");
408 * Remove any reservation from a drive and tell the system
409 * that the volume is unused at least by us.
411 void unreserve_device(DCR *dcr)
413 DEVICE *dev = dcr->dev;
415 if (dcr->reserved_device) {
416 dcr->reserved_device = false;
417 dev->reserved_device--;
418 Dmsg3(dbglvl, "jid=%u Dec reserve=%d dev=%s\n", jid(), dev->reserved_device, dev->print_name());
419 dcr->reserved_device = false;
420 /* If we set read mode in reserving, remove it */
421 if (dev->can_read()) {
424 if (dev->num_writers < 0) {
425 Jmsg1(dcr->jcr, M_ERROR, 0, _("Hey! num_writers=%d!!!!\n"), dev->num_writers);
426 dev->num_writers = 0;
435 * Free a Volume from the Volume list if it is no longer used
437 * Returns: true if the Volume found and removed from the list
438 * false if the Volume is not in the list or is in use
440 bool volume_unused(DCR *dcr)
442 DEVICE *dev = dcr->dev;
444 if (dev->vol == NULL) {
445 Dmsg2(dbglvl, "jid=%u vol_unused: no vol on %s\n", (int)dcr->jcr->JobId, dev->print_name());
446 debug_list_volumes("null vol cannot unreserve_volume");
450 if (dev->is_busy()) {
451 Dmsg2(dbglvl, "jid=%u vol_unused: no vol on %s\n", (int)dcr->jcr->JobId, dev->print_name());
452 debug_list_volumes("dev busy cannot unreserve_volume");
456 return free_volume(dev);
460 * Unconditionally release the volume
462 bool free_volume(DEVICE *dev)
466 if (dev->vol == NULL) {
467 Dmsg2(dbglvl, "jid=%u No vol on dev %s\n", jid(), dev->print_name());
473 vol_list->remove(vol);
474 Dmsg3(dbglvl, "jid=%u free_volume %s dev=%s\n", jid(), vol->vol_name, dev->print_name());
476 debug_list_volumes("free_volume");
482 /* Create the Volume list */
483 void create_volume_list()
486 if (vol_list == NULL) {
487 vol_list = New(dlist(vol, &vol->link));
491 /* Release all Volumes from the list */
492 void free_volume_list()
499 foreach_dlist(vol, vol_list) {
500 Dmsg3(dbglvl, "jid=%u Unreleased Volume=%s dev=%p\n", jid(), vol->vol_name, vol->dev);
502 vol->vol_name = NULL;
509 bool is_volume_in_use(DCR *dcr)
511 VOLRES *vol = find_volume(dcr);
513 Dmsg2(dbglvl, "jid=%u Vol=%s not in use.\n", jid(), dcr->VolumeName);
514 return false; /* vol not in list */
516 ASSERT(vol->dev != NULL);
518 if (dcr->dev == vol->dev) { /* same device OK */
519 Dmsg2(dbglvl, "jid=%u Vol=%s on same dev.\n", jid(), dcr->VolumeName);
522 Dmsg4(dbglvl, "jid=%u Vol=%s on %s we have %s\n", jid(), dcr->VolumeName,
523 vol->dev->print_name(), dcr->dev->print_name());
525 if (!vol->dev->is_busy()) {
526 Dmsg3(dbglvl, "jid=%u Vol=%s dev=%s not busy.\n", jid(), dcr->VolumeName, vol->dev->print_name());
529 Dmsg3(dbglvl, "jid=%u Vol=%s dev=%s busy.\n", jid(), dcr->VolumeName, vol->dev->print_name());
531 Dmsg3(dbglvl, "jid=%u Vol=%s in use by %s.\n", jid(), dcr->VolumeName, vol->dev->print_name());
537 * We get the following type of information:
539 * use storage=xxx media_type=yyy pool_name=xxx pool_type=yyy append=1 copy=0 strip=0
543 * use storage=xxx media_type=yyy pool_name=xxx pool_type=yyy append=0 copy=0 strip=0
547 static bool use_storage_cmd(JCR *jcr)
549 POOL_MEM store_name, dev_name, media_type, pool_name, pool_type;
550 BSOCK *dir = jcr->dir_bsock;
560 memset(&rctx, 0, sizeof(RCTX));
563 * If there are multiple devices, the director sends us
564 * use_device for each device that it wants to use.
566 dirstore = New(alist(10, not_owned_by_alist));
567 msgs = jcr->reserve_msgs = New(alist(10, not_owned_by_alist));
569 Dmsg2(dbglvl, "jid=%u <dird: %s", jid(), dir->msg);
570 ok = sscanf(dir->msg, use_storage, store_name.c_str(),
571 media_type.c_str(), pool_name.c_str(),
572 pool_type.c_str(), &append, &Copy, &Stripe) == 7;
577 jcr->write_store = dirstore;
579 jcr->read_store = dirstore;
581 rctx.append = append;
582 unbash_spaces(store_name);
583 unbash_spaces(media_type);
584 unbash_spaces(pool_name);
585 unbash_spaces(pool_type);
586 store = new DIRSTORE;
587 dirstore->append(store);
588 memset(store, 0, sizeof(DIRSTORE));
589 store->device = New(alist(10));
590 bstrncpy(store->name, store_name, sizeof(store->name));
591 bstrncpy(store->media_type, media_type, sizeof(store->media_type));
592 bstrncpy(store->pool_name, pool_name, sizeof(store->pool_name));
593 bstrncpy(store->pool_type, pool_type, sizeof(store->pool_type));
594 store->append = append;
596 /* Now get all devices */
597 while (dir->recv() >= 0) {
598 Dmsg2(dbglvl, "jid=%u <dird device: %s", jid(), dir->msg);
599 ok = sscanf(dir->msg, use_device, dev_name.c_str()) == 1;
603 unbash_spaces(dev_name);
604 store->device->append(bstrdup(dev_name.c_str()));
606 } while (ok && dir->recv() >= 0);
608 /* Developer debug code */
610 if (debug_level >= dbglvl) {
611 foreach_alist(store, dirstore) {
612 Dmsg6(dbglvl, "jid=%u Storage=%s media_type=%s pool=%s pool_type=%s append=%d\n",
613 (int)rctx.jcr->JobId,
614 store->name, store->media_type, store->pool_name,
615 store->pool_type, store->append);
616 foreach_alist(device_name, store->device) {
617 Dmsg2(dbglvl, "jid=%u Device=%s\n", jid(), device_name);
622 init_jcr_device_wait_timers(jcr);
624 * At this point, we have a list of all the Director's Storage
625 * resources indicated for this Job, which include Pool, PoolType,
626 * storage name, and Media type.
627 * Then for each of the Storage resources, we have a list of
628 * device names that were given.
630 * Wiffle through them and find one that can do the backup.
633 int wait_for_device_retries = 0;
636 rctx.notify_dir = true;
638 for ( ; !fail && !job_canceled(jcr); ) {
639 while ((msg = (char *)msgs->pop())) {
642 rctx.suitable_device = false;
643 rctx.have_volume = false;
644 rctx.VolumeName[0] = 0;
645 rctx.any_drive = false;
646 if (!jcr->PreferMountedVols) {
647 /* Look for unused drives in autochangers */
648 rctx.num_writers = 20000000; /* start with impossible number */
649 rctx.low_use_drive = NULL;
650 rctx.PreferMountedVols = false;
651 rctx.exact_match = false;
652 rctx.autochanger_only = true;
653 Dmsg6(dbglvl, "jid=%u PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n",
654 (int)rctx.jcr->JobId,
655 rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device,
656 rctx.autochanger_only, rctx.any_drive);
657 if ((ok = find_suitable_device_for_job(jcr, rctx))) {
660 /* Look through all drives possibly for low_use drive */
661 if (rctx.low_use_drive) {
662 rctx.try_low_use_drive = true;
663 if ((ok = find_suitable_device_for_job(jcr, rctx))) {
666 rctx.try_low_use_drive = false;
668 rctx.autochanger_only = false;
669 Dmsg6(dbglvl, "jid=%u PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n",
670 (int)rctx.jcr->JobId,
671 rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device,
672 rctx.autochanger_only, rctx.any_drive);
673 if ((ok = find_suitable_device_for_job(jcr, rctx))) {
677 /* Look for an exact match all drives */
678 rctx.PreferMountedVols = true;
679 rctx.exact_match = true;
680 rctx.autochanger_only = false;
681 Dmsg6(dbglvl, "jid=%u PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n",
682 (int)rctx.jcr->JobId,
683 rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device,
684 rctx.autochanger_only, rctx.any_drive);
685 if ((ok = find_suitable_device_for_job(jcr, rctx))) {
688 /* Look for any mounted drive */
689 rctx.exact_match = false;
690 Dmsg6(dbglvl, "jid=%u PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n",
691 (int)rctx.jcr->JobId,
692 rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device,
693 rctx.autochanger_only, rctx.any_drive);
694 if ((ok = find_suitable_device_for_job(jcr, rctx))) {
698 rctx.any_drive = true;
699 Dmsg6(dbglvl, "jid=%u PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n",
700 (int)rctx.jcr->JobId,
701 rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device,
702 rctx.autochanger_only, rctx.any_drive);
703 if ((ok = find_suitable_device_for_job(jcr, rctx))) {
706 /* Keep reservations locked *except* during wait_for_device() */
707 unlock_reservations();
709 * The idea of looping on repeat a few times it to ensure
710 * that if there is some subtle timing problem between two
711 * jobs, we will simply try again, and most likely succeed.
712 * This can happen if one job reserves a drive or finishes using
713 * a drive at the same time a second job wants it.
715 if (repeat++ > 1) { /* try algorithm 3 times */
716 bmicrosleep(30, 0); /* wait a bit */
717 Dmsg1(dbglvl, "jid=%u repeat reserve algorithm\n", (int)rctx.jcr->JobId);
718 } else if (!rctx.suitable_device || !wait_for_device(jcr, wait_for_device_retries)) {
719 Dmsg1(dbglvl, "jid=%u Fail. !suitable_device || !wait_for_device\n",
720 (int)rctx.jcr->JobId);
724 dir->signal(BNET_HEARTBEAT); /* Inform Dir that we are alive */
726 unlock_reservations();
729 * If we get here, there are no suitable devices available, which
730 * means nothing configured. If a device is suitable but busy
731 * with another Volume, we will not come here.
733 unbash_spaces(dir->msg);
734 pm_strcpy(jcr->errmsg, dir->msg);
735 Jmsg(jcr, M_INFO, 0, _("Failed command: %s\n"), jcr->errmsg);
736 Jmsg(jcr, M_FATAL, 0, _("\n"
737 " Device \"%s\" with MediaType \"%s\" requested by DIR not found in SD Device resources.\n"),
738 dev_name.c_str(), media_type.c_str());
739 dir->fsend(NO_device, dev_name.c_str());
741 Dmsg2(dbglvl, "jid=%u >dird: %s", jid(), dir->msg);
744 unbash_spaces(dir->msg);
745 pm_strcpy(jcr->errmsg, dir->msg);
746 Jmsg(jcr, M_FATAL, 0, _("Failed command: %s\n"), jcr->errmsg);
747 dir->fsend(BAD_use, jcr->errmsg);
748 Dmsg2(dbglvl, "jid=%u >dird: %s", jid(), dir->msg);
755 void release_msgs(JCR *jcr)
757 alist *msgs = jcr->reserve_msgs;
764 while ((msg = (char *)msgs->pop())) {
768 jcr->reserve_msgs = NULL;
769 unlock_reservations();
773 * Search for a device suitable for this job.
775 bool find_suitable_device_for_job(JCR *jcr, RCTX &rctx)
783 dirstore = jcr->write_store;
785 dirstore = jcr->read_store;
787 Dmsg5(dbglvl, "jid=%u PrefMnt=%d exact=%d suitable=%d chgronly=%d\n",
788 (int)rctx.jcr->JobId,
789 rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device,
790 rctx.autochanger_only);
792 if (!vol_list->empty() && rctx.append && rctx.PreferMountedVols) {
793 dlist *temp_vol_list, *save_vol_list;
798 * Create a temporary copy of the volume list. We do this,
799 * to avoid having the volume list locked during the
800 * call to reserve_device(), which would cause a deadlock.
801 * Note, we may want to add an update counter on the vol_list
802 * so that if it is modified while we are traversing the copy
803 * we can take note and act accordingly (probably redo the
804 * search at least a few times).
806 Dmsg1(dbglvl, "jid=%u duplicate vol list\n", (int)rctx.jcr->JobId);
807 temp_vol_list = New(dlist(vol, &vol->link));
808 foreach_dlist(vol, vol_list) {
810 VOLRES *tvol = (VOLRES *)malloc(sizeof(VOLRES));
811 memset(tvol, 0, sizeof(VOLRES));
812 tvol->vol_name = bstrdup(vol->vol_name);
813 tvol->dev = vol->dev;
814 nvol = (VOLRES *)temp_vol_list->binary_insert(tvol, my_compare);
816 tvol->dev = NULL; /* don't zap dev entry */
818 Pmsg0(000, "Logic error. Duplicating vol list hit duplicate.\n");
819 Jmsg(jcr, M_WARNING, 0, "Logic error. Duplicating vol list hit duplicate.\n");
824 /* Look through reserved volumes for one we can use */
825 foreach_dlist(vol, temp_vol_list) {
829 foreach_alist(store, dirstore) {
831 foreach_alist(device_name, store->device) {
833 if (strcmp(device_name, vol->dev->device->hdr.name) != 0) {
836 rctx.device_name = device_name;
837 rctx.device = vol->dev->device;
838 bstrncpy(rctx.VolumeName, vol->vol_name, sizeof(rctx.VolumeName));
839 rctx.have_volume = true;
840 /* Try reserving this device and volume */
841 Dmsg3(dbglvl, "jid=%u try vol=%s on device=%s\n", (int)rctx.jcr->JobId,
842 rctx.VolumeName, device_name);
843 stat = reserve_device(rctx);
844 if (stat == 1) { /* found available device */
845 Dmsg2(dbglvl, "jid=%u Suitable device found=%s\n", (int)rctx.jcr->JobId,
849 } else if (stat == 0) { /* device busy */
850 Dmsg2(dbglvl, "jid=%u Suitable device=%s, busy: not use\n",
851 (int)rctx.jcr->JobId, device_name);
853 /* otherwise error */
854 Dmsg1(dbglvl, "jid=%u No suitable device found.\n", (int)rctx.jcr->JobId);
856 rctx.have_volume = false;
862 } /* end for loop over reserved volumes */
864 save_vol_list = vol_list;
865 vol_list = temp_vol_list;
866 free_volume_list(); /* release temp_vol_list */
867 vol_list = save_vol_list;
868 Dmsg1(dbglvl, "jid=%u deleted temp vol list\n", (int)rctx.jcr->JobId);
872 Dmsg2(dbglvl, "jid=%u got vol %s in reserved volums list\n", (int)rctx.jcr->JobId,
878 * No reserved volume we can use, so now search for an available device.
880 * For each storage device that the user specified, we
881 * search and see if there is a resource for that device.
883 foreach_alist(store, dirstore) {
885 foreach_alist(device_name, store->device) {
887 rctx.device_name = device_name;
888 stat = search_res_for_device(rctx);
889 if (stat == 1) { /* found available device */
890 Dmsg2(dbglvl, "jid=%u available device found=%s\n", (int)rctx.jcr->JobId,
894 } else if (stat == 0) { /* device busy */
895 Dmsg2(dbglvl, "jid=%u Suitable device=%s, busy: not use\n",
896 (int)rctx.jcr->JobId, device_name);
898 /* otherwise error */
899 Dmsg1(dbglvl, "jid=%u No suitable device found.\n", (int)rctx.jcr->JobId);
910 * Search for a particular storage device with particular storage
911 * characteristics (MediaType).
913 int search_res_for_device(RCTX &rctx)
915 AUTOCHANGER *changer;
918 Dmsg2(dbglvl, "jid=%u search res for %s\n", (int)rctx.jcr->JobId, rctx.device_name);
919 /* Look through Autochangers first */
920 foreach_res(changer, R_AUTOCHANGER) {
921 Dmsg2(dbglvl, "jid=%u Try match changer res=%s\n", (int)rctx.jcr->JobId, changer->hdr.name);
922 /* Find resource, and make sure we were able to open it */
923 if (fnmatch(rctx.device_name, changer->hdr.name, 0) == 0) {
924 /* Try each device in this AutoChanger */
925 foreach_alist(rctx.device, changer->device) {
926 Dmsg2(dbglvl, "jid=%u Try changer device %s\n", (int)rctx.jcr->JobId,
927 rctx.device->hdr.name);
928 stat = reserve_device(rctx);
929 if (stat != 1) { /* try another device */
933 if (rctx.store->append == SD_APPEND) {
934 Dmsg3(dbglvl, "jid=%u Device %s reserved=%d for append.\n",
935 (int)rctx.jcr->JobId, rctx.device->hdr.name,
936 rctx.jcr->dcr->dev->reserved_device);
938 Dmsg3(dbglvl, "jid=%u Device %s reserved=%d for read.\n",
939 (int)rctx.jcr->JobId, rctx.device->hdr.name,
940 rctx.jcr->read_dcr->dev->reserved_device);
947 /* Now if requested look through regular devices */
948 if (!rctx.autochanger_only) {
949 foreach_res(rctx.device, R_DEVICE) {
950 Dmsg2(dbglvl, "jid=%u Try match res=%s\n", (int)rctx.jcr->JobId, rctx.device->hdr.name);
951 /* Find resource, and make sure we were able to open it */
952 if (fnmatch(rctx.device_name, rctx.device->hdr.name, 0) == 0) {
953 stat = reserve_device(rctx);
954 if (stat != 1) { /* try another device */
958 if (rctx.store->append == SD_APPEND) {
959 Dmsg3(dbglvl, "jid=%u Device %s reserved=%d for append.\n",
960 (int)rctx.jcr->JobId, rctx.device->hdr.name,
961 rctx.jcr->dcr->dev->reserved_device);
963 Dmsg3(dbglvl, "jid=%u Device %s reserved=%d for read.\n",
964 (int)rctx.jcr->JobId, rctx.device->hdr.name,
965 rctx.jcr->read_dcr->dev->reserved_device);
971 return -1; /* nothing found */
975 * Try to reserve a specific device.
977 * Returns: 1 -- OK, have DCR
981 static int reserve_device(RCTX &rctx)
985 const int name_len = MAX_NAME_LENGTH;
987 /* Make sure MediaType is OK */
988 Dmsg3(dbglvl, "jid=%u chk MediaType device=%s request=%s\n",
989 (int)rctx.jcr->JobId,
990 rctx.device->media_type, rctx.store->media_type);
991 if (strcmp(rctx.device->media_type, rctx.store->media_type) != 0) {
995 /* Make sure device exists -- i.e. we can stat() it */
996 if (!rctx.device->dev) {
997 rctx.device->dev = init_dev(rctx.jcr, rctx.device);
999 if (!rctx.device->dev) {
1000 if (rctx.device->changer_res) {
1001 Jmsg(rctx.jcr, M_WARNING, 0, _("\n"
1002 " Device \"%s\" in changer \"%s\" requested by DIR could not be opened or does not exist.\n"),
1003 rctx.device->hdr.name, rctx.device_name);
1005 Jmsg(rctx.jcr, M_WARNING, 0, _("\n"
1006 " Device \"%s\" requested by DIR could not be opened or does not exist.\n"),
1009 return -1; /* no use waiting */
1012 rctx.suitable_device = true;
1013 Dmsg2(dbglvl, "jid=%u try reserve %s\n", rctx.jcr->JobId, rctx.device->hdr.name);
1014 dcr = new_dcr(rctx.jcr, rctx.device->dev);
1016 BSOCK *dir = rctx.jcr->dir_bsock;
1017 dir->fsend(_("3926 Could not get dcr for device: %s\n"), rctx.device_name);
1018 Dmsg1(dbglvl, ">dird: %s", dir->msg);
1021 bstrncpy(dcr->pool_name, rctx.store->pool_name, name_len);
1022 bstrncpy(dcr->pool_type, rctx.store->pool_type, name_len);
1023 bstrncpy(dcr->media_type, rctx.store->media_type, name_len);
1024 bstrncpy(dcr->dev_name, rctx.device_name, name_len);
1025 if (rctx.store->append == SD_APPEND) {
1026 Dmsg3(dbglvl, "jid=%u have_vol=%d vol=%s\n", (int)rctx.jcr->JobId,
1027 rctx.have_volume, rctx.VolumeName);
1028 ok = reserve_device_for_append(dcr, rctx);
1033 rctx.jcr->dcr = dcr;
1034 Dmsg6(dbglvl, "jid=%u Reserved=%d dev_name=%s mediatype=%s pool=%s ok=%d\n",
1035 (int)rctx.jcr->JobId,
1036 dcr->dev->reserved_device,
1037 dcr->dev_name, dcr->media_type, dcr->pool_name, ok);
1038 if (!rctx.have_volume) {
1039 dcr->any_volume = true;
1040 if (dir_find_next_appendable_volume(dcr)) {
1041 bstrncpy(rctx.VolumeName, dcr->VolumeName, sizeof(rctx.VolumeName));
1042 Dmsg2(dbglvl, "jid=%u looking for Volume=%s\n", (int)rctx.jcr->JobId, rctx.VolumeName);
1043 rctx.have_volume = true;
1045 Dmsg1(dbglvl, "jid=%u No next volume found\n", (int)rctx.jcr->JobId);
1046 rctx.have_volume = false;
1047 rctx.VolumeName[0] = 0;
1049 * If there is at least one volume that is valid and in use,
1050 * but we get here, check if we are running with prefers
1051 * non-mounted drives. In that case, we have selected a
1052 * non-used drive and our one and only volume is mounted
1053 * elsewhere, so we bail out and retry using that drive.
1055 if (dcr->volume_in_use && !rctx.PreferMountedVols) {
1056 rctx.PreferMountedVols = true;
1062 ok = reserve_device_for_read(dcr);
1064 rctx.jcr->read_dcr = dcr;
1065 Dmsg6(dbglvl, "jid=%u Read reserved=%d dev_name=%s mediatype=%s pool=%s ok=%d\n",
1066 (int)rctx.jcr->JobId,
1067 dcr->dev->reserved_device,
1068 dcr->dev_name, dcr->media_type, dcr->pool_name, ok);
1074 if (rctx.notify_dir) {
1076 BSOCK *dir = rctx.jcr->dir_bsock;
1077 pm_strcpy(dev_name, rctx.device->hdr.name);
1078 bash_spaces(dev_name);
1079 ok = dir->fsend(OK_device, dev_name.c_str()); /* Return real device name */
1080 Dmsg2(dbglvl, "jid=%u >dird changer: %s", jid(), dir->msg);
1087 rctx.have_volume = false;
1089 Dmsg1(dbglvl, "jid=%u Not OK.\n", (int)rctx.jcr->JobId);
1094 * We "reserve" the drive by setting the ST_READ bit. No one else
1095 * should touch the drive until that is cleared.
1096 * This allows the DIR to "reserve" the device before actually
1099 static bool reserve_device_for_read(DCR *dcr)
1101 DEVICE *dev = dcr->dev;
1102 JCR *jcr = dcr->jcr;
1109 if (is_device_unmounted(dev)) {
1110 Dmsg2(dbglvl, "jid=%u Device %s is BLOCKED due to user unmount.\n",
1111 (int)jcr->JobId, dev->print_name());
1112 Mmsg(jcr->errmsg, _("3601 JobId=%u device %s is BLOCKED due to user unmount.\n"),
1113 jcr->JobId, dev->print_name());
1114 queue_reserve_message(jcr);
1118 if (dev->is_busy()) {
1119 Dmsg5(dbglvl, "jid=%u Device %s is busy ST_READ=%d num_writers=%d reserved=%d.\n",
1120 (int)jcr->JobId, dev->print_name(),
1121 dev->state & ST_READ?1:0, dev->num_writers, dev->reserved_device);
1122 Mmsg(jcr->errmsg, _("3602 JobId=%u device %s is busy (already reading/writing).\n"),
1123 jcr->JobId, dev->print_name());
1124 queue_reserve_message(jcr);
1128 dev->clear_append();
1131 dev->reserved_device++;
1132 Dmsg4(dbglvl, "jid=%u Inc reserve=%d dev=%s %p\n", (int)jcr->JobId,
1133 dev->reserved_device, dev->print_name(), dev);
1134 dcr->reserved_device = true;
1143 * We reserve the device for appending by incrementing the
1144 * reserved_device. We do virtually all the same work that
1145 * is done in acquire_device_for_append(), but we do
1146 * not attempt to mount the device. This routine allows
1147 * the DIR to reserve multiple devices before *really*
1148 * starting the job. It also permits the SD to refuse
1149 * certain devices (not up, ...).
1151 * Note, in reserving a device, if the device is for the
1152 * same pool and the same pool type, then it is acceptable.
1153 * The Media Type has already been checked. If we are
1154 * the first tor reserve the device, we put the pool
1155 * name and pool type in the device record.
1157 static bool reserve_device_for_append(DCR *dcr, RCTX &rctx)
1159 JCR *jcr = dcr->jcr;
1160 DEVICE *dev = dcr->dev;
1167 /* If device is being read, we cannot write it */
1168 if (dev->can_read()) {
1169 Mmsg(jcr->errmsg, _("3603 JobId=%u device %s is busy reading.\n"),
1170 jcr->JobId, dev->print_name());
1171 Dmsg2(dbglvl, "jid=%u %s", jid(), jcr->errmsg);
1172 queue_reserve_message(jcr);
1176 /* If device is unmounted, we are out of luck */
1177 if (is_device_unmounted(dev)) {
1178 Mmsg(jcr->errmsg, _("3604 JobId=%u device %s is BLOCKED due to user unmount.\n"),
1179 jcr->JobId, dev->print_name());
1180 Dmsg2(dbglvl, "jid=%u %s", jid(), jcr->errmsg);
1181 queue_reserve_message(jcr);
1185 Dmsg2(dbglvl, "jid=%u reserve_append device is %s\n",
1186 (int)jcr->JobId, dev->print_name());
1188 /* Now do detailed tests ... */
1189 if (can_reserve_drive(dcr, rctx) != 1) {
1190 Dmsg1(dbglvl, "jid=%u can_reserve_drive!=1\n", (int)jcr->JobId);
1194 dev->reserved_device++;
1195 Dmsg4(dbglvl, "jid=%u Inc reserve=%d dev=%s %p\n", (int)jcr->JobId, dev->reserved_device,
1196 dev->print_name(), dev);
1197 dcr->reserved_device = true;
1206 * Returns: 1 if drive can be reserved
1207 * 0 if we should wait
1208 * -1 on error or impossibility
1210 static int can_reserve_drive(DCR *dcr, RCTX &rctx)
1212 DEVICE *dev = dcr->dev;
1213 JCR *jcr = dcr->jcr;
1215 Dmsg6(dbglvl, "jid=%u PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n",
1217 rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device,
1218 rctx.autochanger_only, rctx.any_drive);
1220 /* setting any_drive overrides PreferMountedVols flag */
1221 if (!rctx.any_drive) {
1223 * When PreferMountedVols is set, we keep track of the
1224 * drive in use that has the least number of writers, then if
1225 * no unmounted drive is found, we try that drive. This
1226 * helps spread the load to the least used drives.
1228 if (rctx.try_low_use_drive && dev == rctx.low_use_drive) {
1229 Dmsg3(dbglvl, "jid=%u OK dev=%s == low_drive=%s.\n",
1230 jcr->JobId, dev->print_name(), rctx.low_use_drive->print_name());
1233 /* If he wants a free drive, but this one is busy, no go */
1234 if (!rctx.PreferMountedVols && dev->is_busy()) {
1235 /* Save least used drive */
1236 if ((dev->num_writers + dev->reserved_device) < rctx.num_writers) {
1237 rctx.num_writers = dev->num_writers + dev->reserved_device;
1238 rctx.low_use_drive = dev;
1239 Dmsg3(dbglvl, "jid=%u set low use drive=%s num_writers=%d\n",
1240 (int)jcr->JobId, dev->print_name(), rctx.num_writers);
1242 Dmsg2(dbglvl, "jid=%u not low use num_writers=%d\n",
1243 (int)jcr->JobId, dev->num_writers+dev->reserved_device);
1245 Dmsg1(dbglvl, "jid=%u failed: !prefMnt && busy.\n", jcr->JobId);
1246 Mmsg(jcr->errmsg, _("3605 JobId=%u wants free drive but device %s is busy.\n"),
1247 jcr->JobId, dev->print_name());
1248 queue_reserve_message(jcr);
1252 /* Check for prefer mounted volumes */
1253 if (rctx.PreferMountedVols && !dev->vol && dev->is_tape()) {
1254 Mmsg(jcr->errmsg, _("3606 JobId=%u prefers mounted drives, but drive %s has no Volume.\n"),
1255 jcr->JobId, dev->print_name());
1256 queue_reserve_message(jcr);
1257 Dmsg1(dbglvl, "jid=%u failed: want mounted -- no vol\n", (uint32_t)jcr->JobId);
1258 return 0; /* No volume mounted */
1261 /* Check for exact Volume name match */
1262 /* ***FIXME*** for Disk, we can accept any volume that goes with this
1265 if (rctx.exact_match && rctx.have_volume) {
1267 Dmsg6(dbglvl, "jid=%u PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n",
1269 rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device,
1270 rctx.autochanger_only, rctx.any_drive);
1271 Dmsg5(dbglvl, "jid=%u have_vol=%d have=%s resvol=%s want=%s\n",
1272 (int)jcr->JobId, rctx.have_volume, dev->VolHdr.VolumeName,
1273 dev->vol?dev->vol->vol_name:"*none*", rctx.VolumeName);
1274 ok = strcmp(dev->VolHdr.VolumeName, rctx.VolumeName) == 0 ||
1275 (dev->vol && strcmp(dev->vol->vol_name, rctx.VolumeName) == 0);
1277 Mmsg(jcr->errmsg, _("3607 JobId=%u wants Vol=\"%s\" drive has Vol=\"%s\" on drive %s.\n"),
1278 jcr->JobId, rctx.VolumeName, dev->VolHdr.VolumeName,
1280 queue_reserve_message(jcr);
1281 Dmsg4(dbglvl, "jid=%u not OK: dev have=%s resvol=%s want=%s\n",
1282 (int)jcr->JobId, dev->VolHdr.VolumeName,
1283 dev->vol?dev->vol->vol_name:"*none*", rctx.VolumeName);
1286 if (is_volume_in_use(dcr)) {
1287 return 0; /* fail if volume on another drive */
1292 /* Check for unused autochanger drive */
1293 if (rctx.autochanger_only && !dev->is_busy() &&
1294 dev->VolHdr.VolumeName[0] == 0) {
1295 /* Device is available but not yet reserved, reserve it for us */
1296 Dmsg2(dbglvl, "jid=%u OK Res Unused autochanger %s.\n",
1297 jcr->JobId, dev->print_name());
1298 bstrncpy(dev->pool_name, dcr->pool_name, sizeof(dev->pool_name));
1299 bstrncpy(dev->pool_type, dcr->pool_type, sizeof(dev->pool_type));
1300 return 1; /* reserve drive */
1304 * Handle the case that there are no writers
1306 if (dev->num_writers == 0) {
1307 /* Now check if there are any reservations on the drive */
1308 if (dev->reserved_device) {
1309 /* Now check if we want the same Pool and pool type */
1310 if (strcmp(dev->pool_name, dcr->pool_name) == 0 &&
1311 strcmp(dev->pool_type, dcr->pool_type) == 0) {
1312 /* OK, compatible device */
1313 Dmsg2(dbglvl, "jid=%u OK dev: %s num_writers=0, reserved, pool matches\n",
1314 jcr->JobId, dev->print_name());
1317 /* Drive Pool not suitable for us */
1318 Mmsg(jcr->errmsg, _(
1319 "3608 JobId=%u wants Pool=\"%s\" but have Pool=\"%s\" nreserve=%d on drive %s.\n"),
1320 jcr->JobId, dcr->pool_name, dev->pool_name,
1321 dev->reserved_device, dev->print_name());
1322 queue_reserve_message(jcr);
1323 Dmsg3(dbglvl, "jid=%u failed: busy num_writers=0, reserved, pool=%s wanted=%s\n",
1324 (int)jcr->JobId, dev->pool_name, dcr->pool_name);
1325 return 0; /* wait */
1327 } else if (dev->can_append()) {
1328 /* Device in append mode, check if changing pool */
1329 if (strcmp(dev->pool_name, dcr->pool_name) == 0 &&
1330 strcmp(dev->pool_type, dcr->pool_type) == 0) {
1331 Dmsg2(dbglvl, "jid=%u OK dev: %s num_writers=0, can_append, pool matches.\n",
1332 jcr->JobId, dev->print_name());
1333 /* OK, compatible device */
1336 /* Changing pool, unload old tape if any in drive */
1337 Dmsg1(dbglvl, "jid=%u OK dev: num_writers=0, not reserved, pool change, unload changer\n",
1339 unload_autochanger(dcr, 0);
1342 /* Device is available but not yet reserved, reserve it for us */
1343 Dmsg2(dbglvl, "jid=%u OK Dev avail reserved %s\n", jcr->JobId, dev->print_name());
1344 bstrncpy(dev->pool_name, dcr->pool_name, sizeof(dev->pool_name));
1345 bstrncpy(dev->pool_type, dcr->pool_type, sizeof(dev->pool_type));
1346 return 1; /* reserve drive */
1350 * Check if the device is in append mode with writers (i.e.
1351 * available if pool is the same).
1353 if (dev->can_append() || dev->num_writers > 0) {
1354 /* Yes, now check if we want the same Pool and pool type */
1355 if (strcmp(dev->pool_name, dcr->pool_name) == 0 &&
1356 strcmp(dev->pool_type, dcr->pool_type) == 0) {
1357 Dmsg2(dbglvl, "jid=%u OK dev: %s num_writers>=0, can_append, pool matches.\n",
1358 jcr->JobId, dev->print_name());
1359 /* OK, compatible device */
1362 /* Drive Pool not suitable for us */
1363 Mmsg(jcr->errmsg, _("3609 JobId=%u wants Pool=\"%s\" but has Pool=\"%s\" on drive %s.\n"),
1364 jcr->JobId, dcr->pool_name, dev->pool_name, dev->print_name());
1365 queue_reserve_message(jcr);
1366 Dmsg3(dbglvl, "jid=%u failed: busy num_writers>0, can_append, pool=%s wanted=%s\n",
1367 (int)jcr->JobId, dev->pool_name, dcr->pool_name);
1368 return 0; /* wait */
1371 Pmsg1(000, _("Logic error!!!! JobId=%u Should not get here.\n"), (int)jcr->JobId);
1372 Mmsg(jcr->errmsg, _("3910 JobId=%u Logic error!!!! drive %s Should not get here.\n"),
1373 jcr->JobId, dev->print_name());
1374 queue_reserve_message(jcr);
1375 Jmsg0(jcr, M_FATAL, 0, _("Logic error!!!! Should not get here.\n"));
1376 return -1; /* error, should not get here */
1378 Mmsg(jcr->errmsg, _("3911 JobId=%u failed reserve drive %s.\n"),
1379 jcr->JobId, dev->print_name());
1380 queue_reserve_message(jcr);
1381 Dmsg2(dbglvl, "jid=%u failed: No reserve %s\n", jcr->JobId, dev->print_name());
1386 * search_lock is already set on entering this routine
1388 static void queue_reserve_message(JCR *jcr)
1391 alist *msgs = jcr->reserve_msgs;
1398 * Look for duplicate message. If found, do
1401 for (i=msgs->size()-1; i >= 0; i--) {
1402 msg = (char *)msgs->get(i);
1406 /* Comparison based on 4 digit message number */
1407 if (strncmp(msg, jcr->errmsg, 4) == 0) {
1411 /* Message unique, so insert it */
1412 jcr->reserve_msgs->push(bstrdup(jcr->errmsg));
1416 * Send any reservation messages queued for this jcr
1418 void send_drive_reserve_messages(JCR *jcr, void sendit(const char *msg, int len, void *sarg), void *arg)
1424 lock_reservations();
1425 msgs = jcr->reserve_msgs;
1426 if (!msgs || msgs->size() == 0) {
1429 for (i=msgs->size()-1; i >= 0; i--) {
1430 msg = (char *)msgs->get(i);
1432 sendit(" ", 3, arg);
1433 sendit(msg, strlen(msg), arg);
1440 unlock_reservations();