X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=bacula%2Fsrc%2Fstored%2Faskdir.c;h=2283834f929479eb821645dc6a63f708e0a841da;hb=ad240c831eef5857ddc5bb0c59a8272323f8fbc9;hp=bd452a7cb74dedc6a6984beace3f8bbf2ef4f159;hpb=3a5e17c3fb18038b3590abb48e4d47c3c1031e34;p=bacula%2Fbacula diff --git a/bacula/src/stored/askdir.c b/bacula/src/stored/askdir.c index bd452a7cb7..2283834f92 100644 --- a/bacula/src/stored/askdir.c +++ b/bacula/src/stored/askdir.c @@ -7,7 +7,7 @@ * Version $Id$ */ /* - Copyright (C) 2000, 2001, 2002 Kern Sibbald and John Walker + Copyright (C) 2000-2003 Kern Sibbald and John Walker This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as @@ -30,32 +30,29 @@ #include "stored.h" /* pull in Storage Deamon headers */ /* Requests sent to the Director */ -static char Find_media[] = "CatReq Job=%s FindMedia=%d\n"; -static char Get_Vol_Info[] = "CatReq Job=%s GetVolInfo VolName=%s\n"; - +static char Find_media[] = "CatReq Job=%s FindMedia=%d\n"; +static char Get_Vol_Info[] = "CatReq Job=%s GetVolInfo VolName=%s write=%d\n"; static char Update_media[] = "CatReq Job=%s UpdateMedia VolName=%s\ - VolJobs=%d VolFiles=%d VolBlocks=%d VolBytes=%" lld " VolMounts=%d\ - VolErrors=%d VolWrites=%d VolMaxBytes=%" lld " EndTime=%d VolStatus=%s\ + VolJobs=%u VolFiles=%u VolBlocks=%u VolBytes=%s VolMounts=%u\ + VolErrors=%u VolWrites=%u MaxVolBytes=%s EndTime=%d VolStatus=%s\ Slot=%d relabel=%d\n"; static char Create_job_media[] = "CatReq Job=%s CreateJobMedia \ - FirstIndex=%d LastIndex=%d StartFile=%d EndFile=%d \ - StartBlock=%d EndBlock=%d\n"; - - + FirstIndex=%u LastIndex=%u StartFile=%u EndFile=%u \ + StartBlock=%u EndBlock=%u\n"; static char FileAttributes[] = "UpdCat Job=%s FileAttributes "; - -static char Job_status[] = "3012 Job %s jobstatus %d\n"; +static char Job_status[] = "3012 Job %s jobstatus %d\n"; /* Responses received from the Director */ -static char OK_media[] = "1000 OK VolName=%127s VolJobs=%d VolFiles=%d\ - VolBlocks=%d VolBytes=%" lld " VolMounts=%d VolErrors=%d VolWrites=%d\ - VolMaxBytes=%" lld " VolCapacityBytes=%" lld " VolStatus=%20s\ - Slot=%d\n"; - +static char OK_media[] = "1000 OK VolName=%127s VolJobs=%u VolFiles=%u\ + VolBlocks=%u VolBytes=%" lld " VolMounts=%u VolErrors=%u VolWrites=%u\ + MaxVolBytes=%" lld " VolCapacityBytes=%" lld " VolStatus=%20s\ + Slot=%d MaxVolJobs=%u MaxVolFiles=%u\n"; static char OK_update[] = "1000 OK UpdateMedia\n"; +/* Forward referenced functions */ +static int device_wait(JCR *jcr, DEVICE *dev, int wait_sec); /* * Send current JobStatus to Director @@ -78,24 +75,27 @@ static int do_request_volume_info(JCR *jcr) jcr->VolumeName[0] = 0; /* No volume */ if (bnet_recv(dir) <= 0) { - Dmsg0(130, "getvolname error bnet_recv\n"); + Dmsg0(200, "getvolname error bnet_recv\n"); + Mmsg(&jcr->errmsg, _("Network error on bnet_recv in req_vol_info.\n")); return 0; } if (sscanf(dir->msg, OK_media, vol->VolCatName, &vol->VolCatJobs, &vol->VolCatFiles, - &vol->VolCatBlocks, &vol->VolCatBytes, + &vol->VolCatBlocks, &vol->VolCatBytes, &vol->VolCatMounts, &vol->VolCatErrors, - &vol->VolCatWrites, &vol->VolCatMaxBytes, + &vol->VolCatWrites, &vol->VolCatMaxBytes, &vol->VolCatCapacityBytes, vol->VolCatStatus, - &vol->Slot) != 12) { - Dmsg1(130, "Bad response from Dir: %s\n", dir->msg); + &vol->Slot, &vol->VolCatMaxJobs, &vol->VolCatMaxFiles) != 14) { + + Dmsg1(200, "Bad response from Dir: %s\n", dir->msg); + Mmsg(&jcr->errmsg, _("Error scanning Dir response: %s\n"), dir->msg); return 0; } unbash_spaces(vol->VolCatName); - strcpy(jcr->VolumeName, vol->VolCatName); /* set desired VolumeName */ + pm_strcpy(&jcr->VolumeName, vol->VolCatName); /* set desired VolumeName */ - Dmsg2(130, "do_reqest_vol_info got slot=%d Volume=%s\n", - vol->Slot, vol->VolCatName); + Dmsg2(200, "do_reqest_vol_info got slot=%d Volume=%s\n", + vol->Slot, vol->VolCatName); return 1; } @@ -110,14 +110,14 @@ static int do_request_volume_info(JCR *jcr) * * Volume information returned in jcr */ -int dir_get_volume_info(JCR *jcr) +int dir_get_volume_info(JCR *jcr, int writing) { BSOCK *dir = jcr->dir_bsock; strcpy(jcr->VolCatInfo.VolCatName, jcr->VolumeName); Dmsg1(200, "dir_get_volume_info=%s\n", jcr->VolCatInfo.VolCatName); bash_spaces(jcr->VolCatInfo.VolCatName); - bnet_fsend(dir, Get_Vol_Info, jcr->Job, jcr->VolCatInfo.VolCatName); + bnet_fsend(dir, Get_Vol_Info, jcr->Job, jcr->VolCatInfo.VolCatName, writing); return do_request_volume_info(jcr); } @@ -149,20 +149,25 @@ int dir_update_volume_info(JCR *jcr, VOLUME_CAT_INFO *vol, int relabel) { BSOCK *dir = jcr->dir_bsock; time_t EndTime = time(NULL); + char ed1[50], ed2[50]; if (vol->VolCatName[0] == 0) { Jmsg0(jcr, M_ERROR, 0, _("NULL Volume name. This shouldn't happen!!!\n")); return 0; } + bash_spaces(vol->VolCatName); bnet_fsend(dir, Update_media, jcr->Job, vol->VolCatName, vol->VolCatJobs, vol->VolCatFiles, - vol->VolCatBlocks, vol->VolCatBytes, + vol->VolCatBlocks, edit_uint64(vol->VolCatBytes, ed1), vol->VolCatMounts, vol->VolCatErrors, - vol->VolCatWrites, vol->VolCatMaxBytes, EndTime, - vol->VolCatStatus, vol->Slot, relabel); + vol->VolCatWrites, edit_uint64(vol->VolCatMaxBytes, ed2), + EndTime, vol->VolCatStatus, vol->Slot, relabel); Dmsg1(120, "update_volume_data(): %s", dir->msg); + unbash_spaces(vol->VolCatName); if (bnet_recv(dir) <= 0) { Dmsg0(190, "updateVolCatInfo error bnet_recv\n"); + Jmsg(jcr, M_ERROR, 0, _("Error updating Volume Info: %s\n"), + bnet_strerror(dir)); return 0; } Dmsg1(120, "Updatevol: %s", dir->msg); @@ -183,11 +188,13 @@ int dir_create_jobmedia_record(JCR *jcr) bnet_fsend(dir, Create_job_media, jcr->Job, jcr->VolFirstFile, jcr->JobFiles, - jcr->start_file, jcr->end_file, - jcr->start_block, jcr->end_block); + jcr->StartFile, jcr->EndFile, + jcr->StartBlock, jcr->EndBlock); Dmsg1(100, "create_jobmedia(): %s", dir->msg); if (bnet_recv(dir) <= 0) { Dmsg0(190, "create_jobmedia error bnet_recv\n"); + Jmsg(jcr, M_ERROR, 0, _("Error creating JobMedia record: %s\n"), + bnet_strerror(dir)); return 0; } Dmsg1(120, "Create_jobmedia: %s", dir->msg); @@ -242,9 +249,6 @@ int dir_update_file_attributes(JCR *jcr, DEV_RECORD *rec) */ int dir_ask_sysop_to_mount_next_volume(JCR *jcr, DEVICE *dev) { - struct timeval tv; - struct timezone tz; - struct timespec timeout; int stat = 0, jstat; /* ******FIXME******* put these on config variable */ int min_wait = 60 * 60; @@ -253,15 +257,13 @@ int dir_ask_sysop_to_mount_next_volume(JCR *jcr, DEVICE *dev) int wait_sec; int num_wait = 0; - int dev_blocked; - char *msg; Dmsg0(130, "enter dir_ask_sysop_to_mount_next_volume\n"); ASSERT(dev->dev_blocked); wait_sec = min_wait; for ( ;; ) { - if (job_cancelled(jcr)) { - Mmsg(&dev->errmsg, _("Job %s cancelled while waiting for mount on Storage Device \"%s\".\n"), + if (job_canceled(jcr)) { + Mmsg(&dev->errmsg, _("Job %s canceled while waiting for mount on Storage Device \"%s\".\n"), jcr->Job, jcr->dev_name); Jmsg(jcr, M_FATAL, 0, "%s", dev->errmsg); return 0; @@ -273,59 +275,34 @@ int dir_ask_sysop_to_mount_next_volume(JCR *jcr, DEVICE *dev) * removable media, return now, otherwise wait * for the operator to mount the media. */ - if (jcr->VolumeName[0] && !(dev->capabilities & CAP_REM) && - dev->capabilities & CAP_LABEL) { + if (jcr->VolumeName[0] && !dev_cap(dev, CAP_REM) && dev_cap(dev, CAP_LABEL)) { Dmsg0(190, "Return 1 from mount without wait.\n"); return 1; } - if (dev->capabilities & CAP_ANONVOLS) { - msg = "Suggest mounting"; - } else { - msg = "Please mount"; - } - Jmsg(jcr, M_MOUNT, 0, _("%s Volume \"%s\" on Storage Device \"%s\" for Job %s\n"), - msg, jcr->VolumeName, jcr->dev_name, jcr->Job); + Jmsg(jcr, M_MOUNT, 0, _( +"Please mount Volume \"%s\" on Storage Device \"%s\" for Job %s\n" +"Use \"mount\" command to release Job.\n"), + jcr->VolumeName, jcr->dev_name, jcr->Job); Dmsg3(190, "Mount %s on %s for Job %s\n", jcr->VolumeName, jcr->dev_name, jcr->Job); } else { jstat = JS_WaitMedia; Jmsg(jcr, M_MOUNT, 0, _( "Job %s waiting. Cannot find any appendable volumes.\n\ -Please use the \"label\" command to create new Volumes for:\n\ - Storage Device \"%s\" with Pool \"%s\" and Media type \"%s\".\n\ -Use \"mount\" to resume the job.\n"), - jcr->Job, jcr->dev_name, jcr->pool_name, jcr->media_type); +Please use the \"label\" command to create a new Volume for:\n\ + Storage: %s\n\ + Media type: %s\n\ + Pool: %s\n"), + jcr->Job, + jcr->dev_name, + jcr->media_type, + jcr->pool_name); } - /* - * Wait then send message again - */ - gettimeofday(&tv, &tz); - timeout.tv_nsec = tv.tv_usec * 1000; - timeout.tv_sec = tv.tv_sec + wait_sec; - P(dev->mutex); - dev_blocked = dev->dev_blocked; - dev->dev_blocked = BST_WAITING_FOR_SYSOP; /* indicate waiting for mount */ jcr->JobStatus = jstat; dir_send_job_status(jcr); - for ( ;!job_cancelled(jcr); ) { - Dmsg1(190, "I'm going to sleep on device %s\n", dev->dev_name); - stat = pthread_cond_timedwait(&dev->wait_next_vol, &dev->mutex, &timeout); - if (dev->dev_blocked == BST_WAITING_FOR_SYSOP) { - break; - } - /* - * Someone other than us blocked the device (probably the - * user via the Console program. - * So, we continue waiting. - */ - gettimeofday(&tv, &tz); - timeout.tv_nsec = 0; - timeout.tv_sec = tv.tv_sec + 10; /* wait 10 seconds */ - } - dev->dev_blocked = dev_blocked; - V(dev->mutex); + stat = device_wait(jcr, dev, wait_sec); if (stat == ETIMEDOUT) { wait_sec *= 2; /* double wait time */ @@ -367,7 +344,7 @@ volumes for Job=%s.\n"), jcr->Job); } break; } - jcr->JobStatus = JS_Running; + set_jcr_job_status(jcr, JS_Running); dir_send_job_status(jcr); Dmsg0(130, "leave dir_ask_sysop_to_mount_next_volume\n"); return 1; @@ -393,11 +370,7 @@ int dir_ask_sysop_to_mount_volume(JCR *jcr, DEVICE *dev) int max_num_wait = 9; /* 5 waits =~ 1 day, then 1 day at a time */ int wait_sec; int num_wait = 0; - int dev_blocked; char *msg; - struct timeval tv; - struct timezone tz; - struct timespec timeout; Dmsg0(130, "enter dir_ask_sysop_to_mount_next_volume\n"); if (!jcr->VolumeName[0]) { @@ -407,8 +380,8 @@ int dir_ask_sysop_to_mount_volume(JCR *jcr, DEVICE *dev) ASSERT(dev->dev_blocked); wait_sec = min_wait; for ( ;; ) { - if (job_cancelled(jcr)) { - Mmsg(&dev->errmsg, _("Job %s cancelled while waiting for mount on Storage Device \"%s\".\n"), + if (job_canceled(jcr)) { + Mmsg(&dev->errmsg, _("Job %s canceled while waiting for mount on Storage Device \"%s\".\n"), jcr->Job, jcr->dev_name); return 0; } @@ -418,36 +391,10 @@ int dir_ask_sysop_to_mount_volume(JCR *jcr, DEVICE *dev) Dmsg3(190, "Mount %s on %s for Job %s\n", jcr->VolumeName, jcr->dev_name, jcr->Job); - /* - * Wait then send message again - */ - gettimeofday(&tv, &tz); - timeout.tv_nsec = tv.tv_usec * 1000; - timeout.tv_sec = tv.tv_sec + wait_sec; - - P(dev->mutex); - dev_blocked = dev->dev_blocked; - dev->dev_blocked = BST_WAITING_FOR_SYSOP; /* indicate waiting for mount */ jcr->JobStatus = JS_WaitMount; dir_send_job_status(jcr); - for ( ;!job_cancelled(jcr); ) { - Dmsg1(190, "I'm going to sleep on device %s\n", dev->dev_name); - stat = pthread_cond_timedwait(&dev->wait_next_vol, &dev->mutex, &timeout); - if (dev->dev_blocked == BST_WAITING_FOR_SYSOP) { - break; - } - /* - * Someone other than us blocked the device (probably the - * user via the Console program. - * So, we continue waiting. - */ - gettimeofday(&tv, &tz); - timeout.tv_nsec = 0; - timeout.tv_sec = tv.tv_sec + 10; /* wait 10 seconds */ - } - dev->dev_blocked = dev_blocked; - V(dev->mutex); + stat = device_wait(jcr, dev, wait_sec); /* wait on device */ if (stat == ETIMEDOUT) { wait_sec *= 2; /* double wait time */ @@ -481,8 +428,84 @@ int dir_ask_sysop_to_mount_volume(JCR *jcr, DEVICE *dev) num_wait = 0; break; } - jcr->JobStatus = JS_Running; + set_jcr_job_status(jcr, JS_Running); dir_send_job_status(jcr); Dmsg0(130, "leave dir_ask_sysop_to_mount_next_volume\n"); return 1; } + +#define HB_TIME 20*60 /* send a heatbeat once every 20 minutes while waiting */ + +static int device_wait(JCR *jcr, DEVICE *dev, int wait_sec) +{ + struct timeval tv; + struct timezone tz; + struct timespec timeout; + int dev_blocked; + time_t start = time(NULL); + time_t last_heartbeat = 0; + int stat = 0; + + /* + * Wait requested time (wait_sec). However, we also wake up every + * HB_TIME seconds and send a heartbeat to the FD and the Director + * to keep stateful firewalls from closing them down while waiting + * for the operator. + */ + gettimeofday(&tv, &tz); + timeout.tv_nsec = tv.tv_usec * 1000; + timeout.tv_sec = tv.tv_sec + (wait_sec > HB_TIME ? HB_TIME: wait_sec); + + P(dev->mutex); + dev_blocked = dev->dev_blocked; + dev->dev_blocked = BST_WAITING_FOR_SYSOP; /* indicate waiting for mount */ + + for ( ; !job_canceled(jcr); ) { + int add_wait; + + Dmsg1(190, "I'm going to sleep on device %s\n", dev->dev_name); + stat = pthread_cond_timedwait(&dev->wait_next_vol, &dev->mutex, &timeout); + + /* Note, this always triggers the first time. We want that. */ + time_t now = time(NULL); + if (now - last_heartbeat >= HB_TIME) { + /* send heartbeats */ + if (jcr->file_bsock) { + bnet_sig(jcr->file_bsock, BNET_HEARTBEAT); + } + if (jcr->dir_bsock) { + bnet_sig(jcr->dir_bsock, BNET_HEARTBEAT); + } + last_heartbeat = now; + } + + /* Check if we blocked the device */ + if (dev->dev_blocked == BST_WAITING_FOR_SYSOP) { + if (stat != ETIMEDOUT) { /* we blocked the device */ + break; /* on error return */ + } + if (now - start >= wait_sec) { /* on exceeding wait time return */ + break; + } + add_wait = wait_sec - (now - start); + if (add_wait > HB_TIME) { + add_wait = HB_TIME; + } + } else { /* Oops someone else has it blocked now */ + add_wait = 10; /* hang around until he releases it */ + } + /* + * Note, if dev_blocked is not BST_WAITING FOR_SYSOP, + * someone other than us has blocked the device (probably the + * user via the Console program), so we continue waiting + * until he releases the device back to us. + */ + gettimeofday(&tv, &tz); + timeout.tv_nsec = tv.tv_usec * 1000; + timeout.tv_sec = tv.tv_sec + add_wait; /* additional wait */ + } + + dev->dev_blocked = dev_blocked; + V(dev->mutex); + return stat; +}