X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=bacula%2Fsrc%2Fstored%2Facquire.c;h=58ed74db62c0d7486f2ed562f21952065fc5b996;hb=d097e158adcfd4e2c3fe874870fd380dcc558c7b;hp=5748591267945ee78b4871b5e7422e8b963c8466;hpb=46205bab364750557e69071ba7d60b754069a334;p=bacula%2Fbacula diff --git a/bacula/src/stored/acquire.c b/bacula/src/stored/acquire.c index 5748591267..58ed74db62 100644 --- a/bacula/src/stored/acquire.c +++ b/bacula/src/stored/acquire.c @@ -1,21 +1,14 @@ -/* - * Routines to acquire and release a device for read/write - * - * Kern Sibbald, August MMII - * - * Version $Id$ - */ /* Bacula® - The Network Backup Solution - Copyright (C) 2002-2007 Free Software Foundation Europe e.V. + Copyright (C) 2002-2008 Free Software Foundation Europe e.V. The main author of Bacula is Kern Sibbald, with contributions from many others, a complete list can be found in the file AUTHORS. This program is Free Software; you can redistribute it and/or modify it under the terms of version two of the GNU General Public - License as published by the Free Software Foundation plus additions - that are listed in the file LICENSE. + License as published by the Free Software Foundation and included + in the file LICENSE. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -32,12 +25,20 @@ (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich, Switzerland, email:ftf@fsfeurope.org. */ +/* + * Routines to acquire and release a device for read/write + * + * Kern Sibbald, August MMII + * + * Version $Id$ + */ #include "bacula.h" /* pull in global headers */ #include "stored.h" /* pull in Storage Deamon headers */ /* Forward referenced functions */ static void attach_dcr_to_dev(DCR *dcr); +static bool is_tape_position_ok(JCR *jcr, DEVICE *dev); /********************************************************************* @@ -62,8 +63,8 @@ bool acquire_device_for_read(DCR *dcr) int vol_label_status; int retry = 0; - Dmsg1(50, "jcr->dcr=%p\n", jcr->dcr); - dev->block(BST_DOING_ACQUIRE); + Dmsg1(950, "jcr->dcr=%p\n", jcr->dcr); + dev->dblock(BST_DOING_ACQUIRE); if (dev->num_writers > 0) { Jmsg2(jcr, M_FATAL, 0, _("Acquire read: num_writers=%d not zero. Job %d canceled.\n"), @@ -97,16 +98,29 @@ bool acquire_device_for_read(DCR *dcr) * same as the current drive, we attempt to find the same * device that was used to write the orginal volume. If * found, we switch to using that device. + * + * N.B. A lot of routines rely on the dcr pointer not changing + * read_records.c even has multiple dcrs cached, so we take care + * here to release all important parts of the dcr and re-acquire + * them such as the block pointer (size may change), but we do + * not release the dcr. */ - Dmsg2(100, "MediaType dcr=%s dev=%s\n", dcr->media_type, dev->device->media_type); + Dmsg2(50, "MediaType dcr=%s dev=%s\n", dcr->media_type, dev->device->media_type); if (dcr->media_type[0] && strcmp(dcr->media_type, dev->device->media_type) != 0) { RCTX rctx; DIRSTORE *store; int stat; - DCR *dcr_save = jcr->dcr; + + Jmsg3(jcr, M_INFO, 0, _("Changing device. Want Media Type=\"%s\" have=\"%s\"\n" + " device=%s\n"), + dcr->media_type, dev->device->media_type, dev->print_name()); + Dmsg3(50, "Changing device. Want Media Type=\"%s\" have=\"%s\"\n" + " device=%s\n", + dcr->media_type, dev->device->media_type, dev->print_name()); + + dev->dunblock(DEV_UNLOCKED); lock_reservations(); - jcr->dcr = NULL; memset(&rctx, 0, sizeof(RCTX)); rctx.jcr = jcr; jcr->reserve_msgs = New(alist(10, not_owned_by_alist)); @@ -120,44 +134,35 @@ bool acquire_device_for_read(DCR *dcr) bstrncpy(store->pool_type, dcr->pool_type, sizeof(store->pool_type)); store->append = false; rctx.store = store; + clean_device(dcr); /* clean up the dcr */ /* - * Note, if search_for_device() succeeds, we get a new_dcr, - * which we do not use except for the dev info. + * Search for a new device */ stat = search_res_for_device(rctx); - release_msgs(jcr); /* release queued messages */ + release_reserve_messages(jcr); /* release queued messages */ unlock_reservations(); + if (stat == 1) { - DCR *new_dcr = jcr->read_dcr; - dev->unblock(); - detach_dcr_from_dev(dcr); /* release old device */ - /* Copy important info from the new dcr */ - dev = dcr->dev = new_dcr->dev; - jcr->read_dcr = dcr; - dcr->device = new_dcr->device; - dcr->max_job_spool_size = dcr->device->max_job_spool_size; - attach_dcr_to_dev(dcr); - new_dcr->VolumeName[0] = 0; - free_dcr(new_dcr); - dev->block(BST_DOING_ACQUIRE); + dev = dcr->dev; /* get new device pointer */ + dev->dblock(BST_DOING_ACQUIRE); + dcr->VolumeName[0] = 0; Jmsg(jcr, M_INFO, 0, _("Media Type change. New device %s chosen.\n"), dev->print_name()); + Dmsg1(50, "Media Type change. New device %s chosen.\n", dev->print_name()); + bstrncpy(dcr->VolumeName, vol->VolumeName, sizeof(dcr->VolumeName)); bstrncpy(dcr->media_type, vol->MediaType, sizeof(dcr->media_type)); dcr->VolCatInfo.Slot = vol->Slot; bstrncpy(dcr->pool_name, store->pool_name, sizeof(dcr->pool_name)); bstrncpy(dcr->pool_type, store->pool_type, sizeof(dcr->pool_type)); - } else if (stat == 0) { /* device busy */ - Pmsg1(000, "Device %s is busy.\n", vol->device); } else { /* error */ Jmsg1(jcr, M_FATAL, 0, _("No suitable device found to read Volume \"%s\"\n"), vol->VolumeName); - jcr->dcr = dcr_save; + Dmsg1(50, "No suitable device found to read Volume \"%s\"\n", vol->VolumeName); goto get_out; } - jcr->dcr = dcr_save; } @@ -200,11 +205,10 @@ bool acquire_device_for_read(DCR *dcr) dev->print_name(), dcr->VolumeName, dev->bstrerror()); goto default_path; } - Dmsg1(100, "opened dev %s OK\n", dev->print_name()); + Dmsg1(50, "opened dev %s OK\n", dev->print_name()); /* Read Volume Label */ - - Dmsg0(200, "calling read-vol-label\n"); + Dmsg0(50, "calling read-vol-label\n"); vol_label_status = read_dev_volume_label(dcr); switch (vol_label_status) { case VOL_OK: @@ -259,7 +263,7 @@ default_path: /* Mount a specific volume and no other */ Dmsg0(200, "calling dir_ask_sysop\n"); - if (!dir_ask_sysop_to_mount_volume(dcr)) { + if (!dir_ask_sysop_to_mount_volume(dcr, ST_READ)) { goto get_out; /* error return */ } try_autochanger = true; /* permit using autochanger again */ @@ -281,15 +285,21 @@ default_path: dcr->VolumeName, dev->print_name()); get_out: - P(dev->mutex); - if (dcr->reserved_device) { + dev->dlock(); + if (dcr && dcr->reserved_device) { dev->reserved_device--; - Dmsg2(100, "Dec reserve=%d dev=%s\n", dev->reserved_device, dev->print_name()); + Dmsg2(50, "Dec reserve=%d dev=%s\n", dev->reserved_device, dev->print_name()); dcr->reserved_device = false; } - V(dev->mutex); - dev->unblock(); - Dmsg1(50, "jcr->dcr=%p\n", jcr->dcr); + /* + * Normally we are blocked, but in at least one error case above + * we are not blocked because we unsuccessfully tried changing + * devices. + */ + if (dev->is_blocked()) { + dev->dunblock(DEV_LOCKED); + } + Dmsg1(950, "jcr->dcr=%p\n", jcr->dcr); return ok; } @@ -305,16 +315,13 @@ get_out: */ DCR *acquire_device_for_append(DCR *dcr) { - bool release = false; - bool recycle = false; - bool do_mount = false; DEVICE *dev = dcr->dev; JCR *jcr = dcr->jcr; init_device_wait_timers(dcr); - dev->block(BST_DOING_ACQUIRE); - Dmsg1(190, "acquire_append device is %s\n", dev->is_tape()?"tape": + dev->dblock(BST_DOING_ACQUIRE); + Dmsg1(100, "acquire_append device is %s\n", dev->is_tape()?"tape": (dev->is_dvd()?"DVD":"disk")); /* @@ -326,76 +333,33 @@ DCR *acquire_device_for_append(DCR *dcr) goto get_out; } - if (dev->can_append()) { + /* + * have_vol defines whether or not mount_next_write_volume should + * ask the Director again about what Volume to use. + */ + if (dev->can_append() && dcr->is_suitable_volume_mounted() && + strcmp(dcr->VolCatInfo.VolCatStatus, "Recycle") != 0) { Dmsg0(190, "device already in append.\n"); /* - * Device already in append mode - * - * Check if we have the right Volume mounted - * OK if current volume info OK - * OK if next volume matches current volume - * otherwise mount desired volume obtained from - * dir_find_next_appendable_volume - * dev->VolHdr.VolumeName is what is in the drive - * dcr->VolumeName is what we pass into the routines, or - * get back from the subroutines. + * At this point, the correct tape is already mounted, so + * we do not need to do mount_next_write_volume(), unless + * we need to recycle the tape. */ - bstrncpy(dcr->VolumeName, dev->VolHdr.VolumeName, sizeof(dcr->VolumeName)); - if (!dir_get_volume_info(dcr, GET_VOL_INFO_FOR_WRITE) && - !(dir_find_next_appendable_volume(dcr) && - strcmp(dev->VolHdr.VolumeName, dcr->VolumeName) == 0)) { /* wrong tape mounted */ - Dmsg2(190, "Wrong tape mounted: %s. wants:%s\n", dev->VolHdr.VolumeName, - dcr->VolumeName); - /* Release volume reserved by dir_find_next_appendable_volume() */ - if (dcr->VolumeName[0]) { - free_unused_volume(dcr); - } - if (dev->num_writers != 0) { - Jmsg3(jcr, M_FATAL, 0, _("Wanted to append to Volume \"%s\", but device %s is busy writing on \"%s\" .\n"), - dcr->VolumeName, dev->print_name(), dev->VolHdr.VolumeName); - Dmsg3(200, "Wanted to append to Volume \"%s\", but device %s is busy writing on \"%s\" .\n", - dcr->VolumeName, dev->print_name(), dev->VolHdr.VolumeName); - goto get_out; - } - /* Wrong tape mounted, release it, then fall through to get correct one */ - Dmsg0(190, "Wrong tape mounted, release and try mount.\n"); - release = true; - do_mount = true; - } else { - /* - * At this point, the correct tape is already mounted, so - * we do not need to do mount_next_write_volume(), unless - * we need to recycle the tape. - */ - recycle = strcmp(dcr->VolCatInfo.VolCatStatus, "Recycle") == 0; - Dmsg1(190, "Correct tape mounted. recycle=%d\n", recycle); - if (recycle && dev->num_writers != 0) { - Jmsg(jcr, M_FATAL, 0, _("Cannot recycle volume \"%s\"" - " on device %s because it is in use by another job.\n"), - dev->VolHdr.VolumeName, dev->print_name()); - goto get_out; - } - if (dev->num_writers == 0) { - memcpy(&dev->VolCatInfo, &dcr->VolCatInfo, sizeof(dev->VolCatInfo)); - } - } + if (dev->num_writers == 0) { + memcpy(&dev->VolCatInfo, &dcr->VolCatInfo, sizeof(dev->VolCatInfo)); + } + if (!is_tape_position_ok(jcr, dev)) { + goto get_out; + } } else { - /* Not already in append mode, so mount the device */ - Dmsg0(190, "Not in append mode, try mount.\n"); - ASSERT(dev->num_writers == 0); - do_mount = true; - } - - if (do_mount || recycle) { - Dmsg0(190, "Do mount_next_write_vol\n"); - bool mounted = mount_next_write_volume(dcr, release); - if (!mounted) { + Dmsg1(190, "jid=%u Do mount_next_write_vol\n", (uint32_t)jcr->JobId); + if (!dcr->mount_next_write_volume()) { if (!job_canceled(jcr)) { /* Reduce "noise" -- don't print if job canceled */ Jmsg(jcr, M_FATAL, 0, _("Could not ready device %s for append.\n"), dev->print_name()); - Dmsg1(200, "Could not ready device %s for append.\n", - dev->print_name()); + Dmsg2(200, "jid=%u Could not ready device %s for append.\n", + (uint32_t)jcr->JobId, dev->print_name()); } goto get_out; } @@ -407,32 +371,59 @@ DCR *acquire_device_for_append(DCR *dcr) jcr->NumWriteVolumes = 1; } dev->VolCatInfo.VolCatJobs++; /* increment number of jobs on vol */ - dir_update_volume_info(dcr, false); /* send Volume info to Director */ - P(dev->mutex); + dir_update_volume_info(dcr, false, false); /* send Volume info to Director */ + dev->dlock(); if (dcr->reserved_device) { dev->reserved_device--; - Dmsg2(100, "Dec reserve=%d dev=%s\n", dev->reserved_device, dev->print_name()); + Dmsg3(100, "jid=%u Dec reserve=%d dev=%s\n", (uint32_t)jcr->JobId, + dev->reserved_device, dev->print_name()); dcr->reserved_device = false; } - V(dev->mutex); - dev->unblock(); + dev->dunblock(DEV_LOCKED); return dcr; /* * Error return */ get_out: - P(dev->mutex); + dev->dlock(); if (dcr->reserved_device) { dev->reserved_device--; - Dmsg2(100, "Dec reserve=%d dev=%s\n", dev->reserved_device, dev->print_name()); + Dmsg3(100, "jid=%u Dec reserve=%d dev=%s\n", (uint32_t)jcr->JobId, + dev->reserved_device, dev->print_name()); dcr->reserved_device = false; } - V(dev->mutex); - dev->unblock(); + dev->dunblock(DEV_LOCKED); return NULL; } +/* + * Insanity check + * + * Check to see if the tape position as defined by the OS is + * the same as our concept. If it is not, we bail out, because + * it means the user has probably manually rewound the tape. + * Note, we check only if num_writers == 0, but this code will + * also work fine for any number of writers. If num_writers > 0, + * we probably should cancel all jobs using this device, or + * perhaps even abort the SD, or at a minimum, mark the tape + * in error. Another strategy with num_writers == 0, would be + * to rewind the tape and do a new eod() request. + */ +static bool is_tape_position_ok(JCR *jcr, DEVICE *dev) +{ + if (dev->is_tape() && dev->num_writers == 0) { + int32_t file = dev->get_os_tape_file(); + if (file >= 0 && file != (int32_t)dev->get_file()) { + Jmsg(jcr, M_FATAL, 0, _("Invalid tape position on volume \"%s\"" + " on device %s. Expected %d, got %d\n"), + dev->VolHdr.VolumeName, dev->print_name(), dev->get_file(), file); + return false; + } + } + return true; +} + /* * This job is done, so release the device. From a Unix standpoint, @@ -449,9 +440,10 @@ bool release_device(DCR *dcr) bool ok = true; /* lock only if not already locked by this thread */ - if (!dcr->dev_locked) { - lock_device(dev); + if (!dcr->is_dev_locked()) { + dev->r_dlock(); } + lock_volumes(); Dmsg2(100, "release_device device %s is %s\n", dev->print_name(), dev->is_tape()?"tape":"disk"); /* if device is reserved, job never started, so release the reserve here */ @@ -464,7 +456,8 @@ bool release_device(DCR *dcr) if (dev->can_read()) { dev->clear_read(); /* clear read bit */ Dmsg0(100, "dir_update_vol_info. Release0\n"); - dir_update_volume_info(dcr, false); /* send Volume info to Director */ + dir_update_volume_info(dcr, false, false); /* send Volume info to Director */ + volume_unused(dcr); } else if (dev->num_writers > 0) { /* @@ -476,12 +469,13 @@ bool release_device(DCR *dcr) dev->num_writers--; Dmsg1(100, "There are %d writers in release_device\n", dev->num_writers); if (dev->is_labeled()) { - Dmsg0(100, "dir_create_jobmedia_record. Release\n"); + Dmsg2(200, "dir_create_jobmedia. Release vol=%s dev=%s\n", + dev->VolCatInfo.VolCatName, dev->print_name()); if (!dev->at_weot() && !dir_create_jobmedia_record(dcr)) { Jmsg(jcr, M_FATAL, 0, _("Could not create JobMedia record for Volume=\"%s\" Job=%s\n"), dcr->VolCatInfo.VolCatName, jcr->Job); } - /* If no more writers, write an EOF */ + /* If no more writers, and no errors, and wrote something, write an EOF */ if (!dev->num_writers && dev->can_write() && dev->block_num > 0) { dev->weof(1); write_ansi_ibm_labels(dcr, ANSI_EOF_LABEL, dev->VolHdr.VolumeName); @@ -489,8 +483,12 @@ bool release_device(DCR *dcr) if (!dev->at_weot()) { dev->VolCatInfo.VolCatFiles = dev->file; /* set number of files */ /* Note! do volume update before close, which zaps VolCatInfo */ - Dmsg0(100, "dir_update_vol_info. Release0\n"); - dir_update_volume_info(dcr, false); /* send Volume info to Director */ + dir_update_volume_info(dcr, false, false); /* send Volume info to Director */ + Dmsg2(200, "dir_update_vol_info. Release vol=%s dev=%s\n", + dev->VolCatInfo.VolCatName, dev->print_name()); + } + if (!dev->num_writers) { /* if no more writers */ + volume_unused(dcr); /* we obviously are not using the volume */ } } @@ -500,7 +498,9 @@ bool release_device(DCR *dcr) * has failed, since the device is not in read mode and * there are no writers. It was probably reserved. */ + volume_unused(dcr); } + unlock_volumes(); /* If no writers, close if file or !CAP_ALWAYS_OPEN */ if (dev->num_writers == 0 && (!dev->is_tape() || !dev->has_cap(CAP_ALWAYSOPEN))) { @@ -528,42 +528,89 @@ bool release_device(DCR *dcr) if (status != 0) { berrno be; Jmsg(jcr, M_ALERT, 0, _("3997 Bad alert command: %s: ERR=%s.\n"), - alert, be.strerror(status)); + alert, be.bstrerror(status)); } Dmsg1(400, "alert status=%d\n", status); free_pool_memory(alert); } - dcr->dev_locked = false; /* set no longer locked */ - unlock_device(dev); - if (jcr->read_dcr == dcr) { - jcr->read_dcr = NULL; - } - if (jcr->dcr == dcr) { - jcr->dcr = NULL; + pthread_cond_broadcast(&dev->wait_next_vol); + Dmsg1(100, "JobId=%u broadcast wait_device_release\n", (uint32_t)jcr->JobId); + pthread_cond_broadcast(&wait_device_release); + dev->dunlock(); + if (dcr->keep_dcr) { + detach_dcr_from_dev(dcr); + } else { + if (jcr->read_dcr == dcr) { + jcr->read_dcr = NULL; + } + if (jcr->dcr == dcr) { + jcr->dcr = NULL; + } + free_dcr(dcr); } - free_dcr(dcr); + Dmsg2(100, "===== Device %s released by JobId=%u\n", dev->print_name(), + (uint32_t)jcr->JobId); + return ok; +} + +/* + * Clean up the device for reuse without freeing the memory + */ +bool clean_device(DCR *dcr) +{ + bool ok; + dcr->keep_dcr = true; /* do not free the dcr */ + ok = release_device(dcr); + dcr->keep_dcr = false; return ok; } /* * Create a new Device Control Record and attach * it to the device (if this is a real job). + * Note, this has been updated so that it can be called first + * without a DEVICE, then a second or third time with a DEVICE, + * and each time, it should cleanup and point to the new device. + * This should facilitate switching devices. + * Note, each dcr must point to the controlling job (jcr). However, + * a job can have multiple dcrs, so we must not store in the jcr's + * structure as previously. The higher level routine must store + * this dcr in the right place + * */ -DCR *new_dcr(JCR *jcr, DEVICE *dev) +DCR *new_dcr(JCR *jcr, DCR *dcr, DEVICE *dev) { - DCR *dcr = (DCR *)malloc(sizeof(DCR)); - memset(dcr, 0, sizeof(DCR)); - dcr->jcr = jcr; + if (!dcr) { + dcr = (DCR *)malloc(sizeof(DCR)); + memset(dcr, 0, sizeof(DCR)); + dcr->tid = pthread_self(); + dcr->spool_fd = -1; + } + dcr->jcr = jcr; /* point back to jcr */ + /* Set device information, possibly change device */ if (dev) { - dcr->dev = dev; - dcr->device = dev->device; + if (dcr->block) { + free_block(dcr->block); + } dcr->block = new_block(dev); + if (dcr->rec) { + free_record(dcr->rec); + } dcr->rec = new_record(); - dcr->max_job_spool_size = dev->device->max_job_spool_size; + if (dcr->attached_to_dev) { + detach_dcr_from_dev(dcr); + } + /* Use job spoolsize prior to device spoolsize */ + if (jcr->spool_size) { + dcr->max_job_spool_size = jcr->spool_size; + } else { + dcr->max_job_spool_size = dev->device->max_job_spool_size; + } + dcr->device = dev->device; + dcr->dev = dev; attach_dcr_to_dev(dcr); } - dcr->spool_fd = -1; return dcr; } @@ -571,6 +618,9 @@ DCR *new_dcr(JCR *jcr, DEVICE *dev) * Search the dcrs list for the given dcr. If it is found, * as it should be, then remove it. Also zap the jcr pointer * to the dcr if it is the same one. + * + * Note, this code will be turned on when we can write to multiple + * dcrs at the same time. */ #ifdef needed static void remove_dcr_from_dcrs(DCR *dcr) @@ -598,42 +648,28 @@ static void attach_dcr_to_dev(DCR *dcr) DEVICE *dev = dcr->dev; JCR *jcr = dcr->jcr; - if (!dcr->attached_to_dev && dev->is_open() && jcr && jcr->JobType != JT_SYSTEM) { + if (jcr) Dmsg1(500, "JobId=%u enter attach_dcr_to_dev\n", (uint32_t)jcr->JobId); + if (!dcr->attached_to_dev && dev->initiated && jcr && jcr->JobType != JT_SYSTEM) { dev->attached_dcrs->append(dcr); /* attach dcr to device */ dcr->attached_to_dev = true; + Dmsg1(500, "JobId=%u attach_dcr_to_dev\n", (uint32_t)jcr->JobId); } } void detach_dcr_from_dev(DCR *dcr) { DEVICE *dev = dcr->dev; - - if (dcr->reserved_device) { - dcr->reserved_device = false; - lock_device(dev); - dev->reserved_device--; - Dmsg2(100, "Dec reserve=%d dev=%s\n", dev->reserved_device, dev->print_name()); - dcr->reserved_device = false; - /* If we set read mode in reserving, remove it */ - if (dev->can_read()) { - dev->clear_read(); - } - if (dev->num_writers < 0) { - Jmsg1(dcr->jcr, M_ERROR, 0, _("Hey! num_writers=%d!!!!\n"), dev->num_writers); - dev->num_writers = 0; - } - unlock_device(dev); - } + Dmsg0(500, "Enter detach_dcr_from_dev\n"); /* jcr is NULL in some cases */ /* Detach this dcr only if attached */ - if (dcr->attached_to_dev) { + if (dcr->attached_to_dev && dev) { + dev->dlock(); + unreserve_device(dcr); dcr->dev->attached_dcrs->remove(dcr); /* detach dcr from device */ dcr->attached_to_dev = false; // remove_dcr_from_dcrs(dcr); /* remove dcr from jcr list */ + dev->dunlock(); } - free_unused_volume(dcr); /* free unused vols attached to this dcr */ - pthread_cond_broadcast(&dcr->dev->wait_next_vol); - pthread_cond_broadcast(&wait_device_release); } /* @@ -644,9 +680,7 @@ void free_dcr(DCR *dcr) { JCR *jcr = dcr->jcr; - if (dcr->dev) { - detach_dcr_from_dev(dcr); - } + detach_dcr_from_dev(dcr); if (dcr->block) { free_block(dcr->block);