git.sur5r.net Git - bacula/bacula/blob - bacula/src/dird/vbackup.c

   1 /*
   2    Bacula® - The Network Backup Solution
   3
   4    Copyright (C) 2008-2008 Free Software Foundation Europe e.V.
   5
   6    The main author of Bacula is Kern Sibbald, with contributions from
   7    many others, a complete list can be found in the file AUTHORS.
   8    This program is Free Software; you can redistribute it and/or
   9    modify it under the terms of version two of the GNU General Public
  10    License as published by the Free Software Foundation and included
  11    in the file LICENSE.
  12
  13    This program is distributed in the hope that it will be useful, but
  14    WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16    General Public License for more details.
  17
  18    You should have received a copy of the GNU General Public License
  19    along with this program; if not, write to the Free Software
  20    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  21    02110-1301, USA.
  22
  23    Bacula® is a registered trademark of Kern Sibbald.
  24    The licensor of Bacula is the Free Software Foundation Europe
  25    (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
  26    Switzerland, email:ftf@fsfeurope.org.
  27 */
  28 /*
  29  *
  30  *   Bacula Director -- vbackup.c -- responsible for doing virtual
  31  *     backup jobs or in other words, consolidation or synthetic
  32  *     backups.
  33  *
  34  *     Kern Sibbald, July MMVIII
  35  *
  36  *  Basic tasks done here:
  37  *     Open DB and create records for this job.
  38  *     Figure out what Jobs to copy.
  39  *     Open Message Channel with Storage daemon to tell him a job will be starting.
  40  *     Open connection with File daemon and pass him commands
  41  *       to do the backup.
  42  *     When the File daemon finishes the job, update the DB.
  43  *
  44  *   Version $Id: $
  45  */
  46
  47 #include "bacula.h"
  48 #include "dird.h"
  49 #include "ua.h"
  50
  51 static const int dbglevel = 10;
  52
  53 static char OKbootstrap[] = "3000 OK bootstrap\n";
  54
  55 static bool create_bootstrap_file(JCR *jcr, POOLMEM *jobids);
  56 void vbackup_cleanup(JCR *jcr, int TermCode);
  57
  58 /*
  59  * Called here before the job is run to do the job
  60  *   specific setup.
  61  */
  62 bool do_vbackup_init(JCR *jcr)
  63 {
  64    /* ***FIXME*** remove when implemented in job.c */
  65    if (!jcr->rpool_source) {
  66       jcr->rpool_source = get_pool_memory(PM_MESSAGE);
  67       pm_strcpy(jcr->rpool_source, _("unknown source"));
  68    }
  69
  70    if (!get_or_create_fileset_record(jcr)) {
  71       Dmsg1(dbglevel, "JobId=%d no FileSet\n", (int)jcr->JobId);
  72       return false;
  73    }
  74
  75    apply_pool_overrides(jcr);
  76
  77    if (!allow_duplicate_job(jcr)) {
  78       return false;
  79    }
  80
  81    /*
  82     * Note, at this point, pool is the pool for this job.  We
  83     *  transfer it to rpool (read pool), and a bit later,
  84     *  pool will be changed to point to the write pool,
  85     *  which comes from pool->NextPool.
  86     */
  87    jcr->rpool = jcr->pool;            /* save read pool */
  88    pm_strcpy(jcr->rpool_source, jcr->pool_source);
  89
  90
  91    Dmsg2(dbglevel, "Read pool=%s (From %s)\n", jcr->rpool->name(), jcr->rpool_source);
  92
  93    POOLMEM *jobids = get_pool_memory(PM_FNAME);
  94    db_accurate_get_jobids(jcr, jcr->db, &jcr->jr, jobids);
  95    Dmsg1(000, "Accurate jobids=%s\n", jobids);
  96    if (*jobids == 0) {
  97       free_pool_memory(jobids);
  98       Jmsg(jcr, M_FATAL, 0, _("Cannot find previous JobIds.\n"));
  99       return false;
 100    }
 101
 102    if (!create_bootstrap_file(jcr, jobids)) {
 103       Jmsg(jcr, M_FATAL, 0, _("Could not get or create the FileSet record.\n"));
 104       free_pool_memory(jobids);
 105       return false;
 106    }
 107    free_pool_memory(jobids);
 108
 109    /*
 110     * If the original backup pool has a NextPool, make sure a
 111     *  record exists in the database. Note, in this case, we
 112     *  will be backing up from pool to pool->NextPool.
 113     */
 114    if (jcr->pool->NextPool) {
 115       jcr->jr.PoolId = get_or_create_pool_record(jcr, jcr->pool->NextPool->name());
 116       if (jcr->jr.PoolId == 0) {
 117          return false;
 118       }
 119    }
 120    /* ***FIXME*** this is probably not needed */
 121    if (!set_migration_wstorage(jcr, jcr->pool)) {
 122       return false;
 123    }
 124    pm_strcpy(jcr->pool_source, _("Job Pool's NextPool resource"));
 125
 126    Dmsg2(dbglevel, "Write pool=%s read rpool=%s\n", jcr->pool->name(), jcr->rpool->name());
 127
 128    create_clones(jcr);
 129
 130    return true;
 131 }
 132
 133 /*
 134  * Do a backup of the specified FileSet
 135  *
 136  *  Returns:  false on failure
 137  *            true  on success
 138  */
 139 bool do_vbackup(JCR *jcr)
 140 {
 141    char ed1[100];
 142    BSOCK *sd;
 143    JCR *mig_jcr = jcr->mig_jcr;    /* newly backed up job */
 144
 145    /*
 146     * If mig_jcr is NULL, there is nothing to do for this job,
 147     *  so set a normal status, cleanup and return OK.
 148     */
 149    if (!mig_jcr) {
 150       set_jcr_job_status(jcr, JS_Terminated);
 151       vbackup_cleanup(jcr, jcr->JobStatus);
 152       return true;
 153    }
 154
 155    /* Print Job Start message */
 156    Jmsg(jcr, M_INFO, 0, _("Start Vbackup JobId %s, Job=%s\n"),
 157         edit_uint64(jcr->JobId, ed1), jcr->Job);
 158
 159
 160
 161    /*
 162     * Open a message channel connection with the Storage
 163     * daemon. This is to let him know that our client
 164     * will be contacting him for a backup  session.
 165     *
 166     */
 167    Dmsg0(110, "Open connection with storage daemon\n");
 168    set_jcr_job_status(jcr, JS_WaitSD);
 169    set_jcr_job_status(mig_jcr, JS_WaitSD);
 170    /*
 171     * Start conversation with Storage daemon
 172     */
 173    if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
 174       return false;
 175    }
 176    sd = jcr->store_bsock;
 177    /*
 178     * Now start a job with the Storage daemon
 179     */
 180    Dmsg2(dbglevel, "Read store=%s, write store=%s\n",
 181       ((STORE *)jcr->rstorage->first())->name(),
 182       ((STORE *)jcr->wstorage->first())->name());
 183    if (((STORE *)jcr->rstorage->first())->name() == ((STORE *)jcr->wstorage->first())->name()) {
 184       Jmsg(jcr, M_FATAL, 0, _("Read storage \"%s\" same as write storage.\n"),
 185            ((STORE *)jcr->rstorage->first())->name());
 186       return false;
 187    }
 188    if (!start_storage_daemon_job(jcr, jcr->rstorage, jcr->wstorage)) {
 189       return false;
 190    }
 191    Dmsg0(150, "Storage daemon connection OK\n");
 192
 193    if (!send_bootstrap_file(jcr, sd) ||
 194        !response(jcr, sd, OKbootstrap, "Bootstrap", DISPLAY_ERROR)) {
 195       return false;
 196    }
 197
 198    /*
 199     * We re-update the job start record so that the start
 200     *  time is set after the run before job.  This avoids
 201     *  that any files created by the run before job will
 202     *  be saved twice.  They will be backed up in the current
 203     *  job, but not in the next one unless they are changed.
 204     *  Without this, they will be backed up in this job and
 205     *  in the next job run because in that case, their date
 206     *   is after the start of this run.
 207     */
 208    jcr->start_time = time(NULL);
 209    jcr->jr.StartTime = jcr->start_time;
 210    jcr->jr.JobTDate = jcr->start_time;
 211    set_jcr_job_status(jcr, JS_Running);
 212
 213    /* Update job start record for this migration control job */
 214    if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
 215       Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
 216       return false;
 217    }
 218
 219
 220    mig_jcr->start_time = time(NULL);
 221    mig_jcr->jr.StartTime = mig_jcr->start_time;
 222    mig_jcr->jr.JobTDate = mig_jcr->start_time;
 223    set_jcr_job_status(mig_jcr, JS_Running);
 224
 225    /* Update job start record for the real migration backup job */
 226    if (!db_update_job_start_record(mig_jcr, mig_jcr->db, &mig_jcr->jr)) {
 227       Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(mig_jcr->db));
 228       return false;
 229    }
 230
 231    Dmsg4(dbglevel, "mig_jcr: Name=%s JobId=%d Type=%c Level=%c\n",
 232       mig_jcr->jr.Name, (int)mig_jcr->jr.JobId,
 233       mig_jcr->jr.JobType, mig_jcr->jr.JobLevel);
 234
 235
 236    /*
 237     * Start the job prior to starting the message thread below
 238     * to avoid two threads from using the BSOCK structure at
 239     * the same time.
 240     */
 241    if (!sd->fsend("run")) {
 242       return false;
 243    }
 244
 245    /*
 246     * Now start a Storage daemon message thread
 247     */
 248    if (!start_storage_daemon_message_thread(jcr)) {
 249       return false;
 250    }
 251
 252
 253    set_jcr_job_status(jcr, JS_Running);
 254    set_jcr_job_status(mig_jcr, JS_Running);
 255
 256    /* Pickup Job termination data */
 257    /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */
 258    wait_for_storage_daemon_termination(jcr);
 259    set_jcr_job_status(jcr, jcr->SDJobStatus);
 260    db_write_batch_file_records(jcr);    /* used by bulk batch file insert */
 261    if (jcr->JobStatus != JS_Terminated) {
 262       return false;
 263    }
 264
 265    vbackup_cleanup(jcr, jcr->JobStatus);
 266    if (mig_jcr) {
 267       char jobid[50];
 268       UAContext *ua = new_ua_context(jcr);
 269       edit_uint64(jcr->previous_jr.JobId, jobid);
 270       /* Purge all old file records, but leave Job record */
 271       purge_files_from_jobs(ua, jobid);
 272       free_ua_context(ua);
 273    }
 274    return true;
 275 }
 276
 277
 278 /*
 279  * Release resources allocated during backup.
 280  */
 281 void vbackup_cleanup(JCR *jcr, int TermCode)
 282 {
 283    char sdt[50], edt[50], schedt[50];
 284    char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
 285    char ec6[30], ec7[30], ec8[30], elapsed[50];
 286    char term_code[100], fd_term_msg[100], sd_term_msg[100];
 287    const char *term_msg;
 288    int msg_type = M_INFO;
 289    MEDIA_DBR mr;
 290    CLIENT_DBR cr;
 291    double kbps, compression;
 292    utime_t RunTime;
 293
 294    Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
 295    memset(&mr, 0, sizeof(mr));
 296    memset(&cr, 0, sizeof(cr));
 297
 298    update_job_end(jcr, TermCode);
 299
 300    if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
 301       Jmsg(jcr, M_WARNING, 0, _("Error getting Job record for Job report: ERR=%s"),
 302          db_strerror(jcr->db));
 303       set_jcr_job_status(jcr, JS_ErrorTerminated);
 304    }
 305
 306    bstrncpy(cr.Name, jcr->client->name(), sizeof(cr.Name));
 307    if (!db_get_client_record(jcr, jcr->db, &cr)) {
 308       Jmsg(jcr, M_WARNING, 0, _("Error getting Client record for Job report: ERR=%s"),
 309          db_strerror(jcr->db));
 310    }
 311
 312    bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
 313    if (!db_get_media_record(jcr, jcr->db, &mr)) {
 314       Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
 315          mr.VolumeName, db_strerror(jcr->db));
 316       set_jcr_job_status(jcr, JS_ErrorTerminated);
 317    }
 318
 319    update_bootstrap_file(jcr);
 320
 321    switch (jcr->JobStatus) {
 322       case JS_Terminated:
 323          if (jcr->Errors || jcr->SDErrors) {
 324             term_msg = _("Backup OK -- with warnings");
 325          } else {
 326             term_msg = _("Backup OK");
 327          }
 328          break;
 329       case JS_FatalError:
 330       case JS_ErrorTerminated:
 331          term_msg = _("*** Backup Error ***");
 332          msg_type = M_ERROR;          /* Generate error message */
 333          if (jcr->store_bsock) {
 334             jcr->store_bsock->signal(BNET_TERMINATE);
 335             if (jcr->SD_msg_chan) {
 336                pthread_cancel(jcr->SD_msg_chan);
 337             }
 338          }
 339          break;
 340       case JS_Canceled:
 341          term_msg = _("Backup Canceled");
 342          if (jcr->store_bsock) {
 343             jcr->store_bsock->signal(BNET_TERMINATE);
 344             if (jcr->SD_msg_chan) {
 345                pthread_cancel(jcr->SD_msg_chan);
 346             }
 347          }
 348          break;
 349       default:
 350          term_msg = term_code;
 351          sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
 352          break;
 353    }
 354    bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
 355    bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
 356    bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
 357    RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
 358    if (RunTime <= 0) {
 359       kbps = 0;
 360    } else {
 361       kbps = ((double)jcr->jr.JobBytes) / (1000.0 * (double)RunTime);
 362    }
 363    if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
 364       /*
 365        * Note, if the job has erred, most likely it did not write any
 366        *  tape, so suppress this "error" message since in that case
 367        *  it is normal.  Or look at it the other way, only for a
 368        *  normal exit should we complain about this error.
 369        */
 370       if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
 371          Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
 372       }
 373       jcr->VolumeName[0] = 0;         /* none */
 374    }
 375
 376    if (jcr->ReadBytes == 0) {
 377       bstrncpy(compress, "None", sizeof(compress));
 378    } else {
 379       compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
 380       if (compression < 0.5) {
 381          bstrncpy(compress, "None", sizeof(compress));
 382       } else {
 383          bsnprintf(compress, sizeof(compress), "%.1f %%", compression);
 384       }
 385    }
 386    jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
 387    jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
 388
 389 // bmicrosleep(15, 0);                /* for debugging SIGHUP */
 390
 391    Jmsg(jcr, msg_type, 0, _("Bacula %s %s (%s): %s\n"
 392 "  Build OS:               %s %s %s\n"
 393 "  JobId:                  %d\n"
 394 "  Job:                    %s\n"
 395 "  Backup Level:           %s%s\n"
 396 "  Client:                 \"%s\" %s\n"
 397 "  FileSet:                \"%s\" %s\n"
 398 "  Pool:                   \"%s\" (From %s)\n"
 399 "  Catalog:                \"%s\" (From %s)\n"
 400 "  Storage:                \"%s\" (From %s)\n"
 401 "  Scheduled time:         %s\n"
 402 "  Start time:             %s\n"
 403 "  End time:               %s\n"
 404 "  Elapsed time:           %s\n"
 405 "  Priority:               %d\n"
 406 "  FD Files Written:       %s\n"
 407 "  SD Files Written:       %s\n"
 408 "  FD Bytes Written:       %s (%sB)\n"
 409 "  SD Bytes Written:       %s (%sB)\n"
 410 "  Rate:                   %.1f KB/s\n"
 411 "  Software Compression:   %s\n"
 412 "  VSS:                    %s\n"
 413 "  Encryption:             %s\n"
 414 "  Accurate:               %s\n"
 415 "  Volume name(s):         %s\n"
 416 "  Volume Session Id:      %d\n"
 417 "  Volume Session Time:    %d\n"
 418 "  Last Volume Bytes:      %s (%sB)\n"
 419 "  Non-fatal FD errors:    %d\n"
 420 "  SD Errors:              %d\n"
 421 "  FD termination status:  %s\n"
 422 "  SD termination status:  %s\n"
 423 "  Termination:            %s\n\n"),
 424         my_name, VERSION, LSMDATE, edt,
 425         HOST_OS, DISTNAME, DISTVER,
 426         jcr->jr.JobId,
 427         jcr->jr.Job,
 428         level_to_str(jcr->JobLevel), jcr->since,
 429         jcr->client->name(), cr.Uname,
 430         jcr->fileset->name(), jcr->FSCreateTime,
 431         jcr->pool->name(), jcr->pool_source,
 432         jcr->catalog->name(), jcr->catalog_source,
 433         jcr->wstore->name(), jcr->wstore_source,
 434         schedt,
 435         sdt,
 436         edt,
 437         edit_utime(RunTime, elapsed, sizeof(elapsed)),
 438         jcr->JobPriority,
 439         edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
 440         edit_uint64_with_commas(jcr->SDJobFiles, ec2),
 441         edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
 442         edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
 443         edit_uint64_with_commas(jcr->SDJobBytes, ec5),
 444         edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
 445         kbps,
 446         compress,
 447         jcr->VSS?_("yes"):_("no"),
 448         jcr->Encrypt?_("yes"):_("no"),
 449         jcr->accurate?_("yes"):_("no"),
 450         jcr->VolumeName,
 451         jcr->VolSessionId,
 452         jcr->VolSessionTime,
 453         edit_uint64_with_commas(mr.VolBytes, ec7),
 454         edit_uint64_with_suffix(mr.VolBytes, ec8),
 455         jcr->Errors,
 456         jcr->SDErrors,
 457         fd_term_msg,
 458         sd_term_msg,
 459         term_msg);
 460
 461    Dmsg0(100, "Leave vbackup_cleanup()\n");
 462 }
 463
 464 /*
 465  * This callback routine is responsible for inserting the
 466  *  items it gets into the bootstrap structure. For each JobId selected
 467  *  this routine is called once for each file. We do not allow
 468  *  duplicate filenames, but instead keep the info from the most
 469  *  recent file entered (i.e. the JobIds are assumed to be sorted)
 470  *
 471  *   See uar_sel_files in sql_cmds.c for query that calls us.
 472  *      row[0]=Path, row[1]=Filename, row[2]=FileIndex
 473  *      row[3]=JobId row[4]=LStat
 474  */
 475 int insert_bootstrap_handler(void *ctx, int num_fields, char **row)
 476 {
 477    JobId_t JobId;
 478    int FileIndex;
 479    RBSR *bsr = (RBSR *)ctx;
 480
 481    JobId = str_to_int64(row[3]);
 482    FileIndex = str_to_int64(row[2]);
 483    add_findex(bsr, JobId, FileIndex);
 484    return 0;
 485 }
 486
 487
 488 static bool create_bootstrap_file(JCR *jcr, POOLMEM *jobids)
 489 {
 490    RESTORE_CTX rx;
 491    UAContext *ua;
 492
 493    memset(&rx, 0, sizeof(rx));
 494    rx.bsr = new_bsr();
 495    ua = new_ua_context(jcr);
 496    rx.JobIds = jobids;
 497
 498 #define new_get_file_list
 499 #ifdef new_get_file_list
 500    if (!db_get_file_list(jcr, ua->db, jobids, insert_bootstrap_handler, (void *)rx.bsr)) {
 501       Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(ua->db));
 502    }
 503 #else
 504    char *p;
 505    JobId_t JobId, last_JobId = 0;
 506    rx.query = get_pool_memory(PM_MESSAGE);
 507    for (p=rx.JobIds; get_next_jobid_from_list(&p, &JobId) > 0; ) {
 508       char ed1[50];
 509
 510       if (JobId == last_JobId) {
 511          continue;                    /* eliminate duplicate JobIds */
 512       }
 513       last_JobId = JobId;
 514       /*
 515        * Find files for this JobId and insert them in the tree
 516        */
 517       Mmsg(rx.query, uar_sel_files, edit_int64(JobId, ed1));
 518       Dmsg1(000, "uar_sel_files=%s\n", rx.query);
 519       if (!db_sql_query(ua->db, rx.query, insert_bootstrap_handler, (void *)rx.bsr)) {
 520          Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(ua->db));
 521       }
 522       free_pool_memory(rx.query);
 523       rx.query = NULL;
 524    }
 525 #endif
 526
 527    complete_bsr(ua, rx.bsr);
 528    Dmsg0(000, "Print bsr\n");
 529    print_bsr(ua, rx.bsr);
 530
 531    jcr->ExpectedFiles = write_bsr_file(ua, rx);
 532    Dmsg1(000, "Found %d files to consolidate.\n", jcr->ExpectedFiles);
 533    if (jcr->ExpectedFiles == 0) {
 534       free_ua_context(ua);
 535       free_bsr(rx.bsr);
 536       return false;
 537    }
 538    free_ua_context(ua);
 539    free_bsr(rx.bsr);
 540    return true;
 541 }